From 04cde9872a1bc497c8887f69cb857bcd52728c21 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Tue, 4 Jun 2024 17:53:31 -0600 Subject: [PATCH 01/33] IOSS: refactor to reduce strings; give flexibility and efficiency --- .../libraries/ioss/src/Ioss_ChainGenerator.C | 53 +++++++++++-------- .../libraries/ioss/src/Ioss_FaceGenerator.C | 13 +++-- .../libraries/ioss/src/Ioss_FaceGenerator.h | 10 ++-- 3 files changed, 49 insertions(+), 27 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/Ioss_ChainGenerator.C b/packages/seacas/libraries/ioss/src/Ioss_ChainGenerator.C index 8f16e84524..7e173bf749 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_ChainGenerator.C +++ b/packages/seacas/libraries/ioss/src/Ioss_ChainGenerator.C @@ -84,14 +84,15 @@ namespace { template void get_line_front(Ioss::SideSet *fs, const Ioss::ElementBlock *block, - const std::string &adj_block, Ioss::chain_t &element_chains, + Ioss::chain_t &element_chains, front_t &front) { + const auto adj_block_name = block->name(); Ioss::NameList blocks; fs->block_membership(blocks); for (const auto &fs_block : blocks) { - if (fs_block == adj_block) { - // This faceset has some elements that are in `adj_block` -- put those in the `front` + if (fs_block == adj_block_name) { + // This faceset has some elements that are in `adj_block_name` -- put those in the `front` // list. Get list of "sides" in this faceset... std::vector element_side; assert(fs->side_block_count() == 1); @@ -117,23 +118,19 @@ namespace { } template - front_t get_line_front(Ioss::Region ®ion, const std::string &adj_block, + front_t get_line_front(Ioss::Region ®ion, const Ioss::ElementBlock *block, Ioss::chain_t &element_chains, const std::string &surface_list) { front_t front; // Since lines can not cross element blocks, we can process everything a block at a time. - const auto *block = region.get_element_block(adj_block); assert(block != nullptr); - if (block->topology()->shape() != Ioss::ElementShape::HEX) { - fmt::print("Skipping Element Block {}; it does not contain HEX elements.\n", adj_block); - return front; - } + assert(block->topology()->shape() == Ioss::ElementShape::HEX); if (surface_list == "ALL") { const Ioss::SideSetContainer &fss = region.get_sidesets(); for (const auto &fs : fss) { - get_line_front(fs, block, adj_block, element_chains, front); + get_line_front(fs, block, element_chains, front); } } else { @@ -141,7 +138,7 @@ namespace { for (const auto &surface : selected_surfaces) { auto *sset = region.get_sideset(surface); if (sset != nullptr) { - get_line_front(sset, block, adj_block, element_chains, front); + get_line_front(sset, block, element_chains, front); } } } @@ -201,33 +198,47 @@ namespace Ioss { size_t numel = region.get_property("element_count").get_int(); Ioss::chain_t element_chains(numel); - // Generate the faces for use later... - Ioss::FaceGenerator face_generator(region); - face_generator.generate_faces((INT)0, true, true); - // Determine which element block(s) are adjacent to the faceset specifying "lines" // The `adjacent_blocks` contains the names of all element blocks that are adjacent to the // surface(s) that specify the faces at the 'root' of the lines... - Ioss::NameList adjacent_blocks = get_adjacent_blocks(region, surface_list); - if (adjacent_blocks.empty()) { + Ioss::NameList adjacent_block_names = get_adjacent_blocks(region, surface_list); + if (adjacent_block_names.empty()) { fmt::print("WARNING: No surfaces in the model matched the input surface list ({}).\n\tNo " "chains will be generated.\n", surface_list); } - for (const auto &adj_block : adjacent_blocks) { + + // Get the EB* corresponding to the EB names... + Ioss::ElementBlockContainer adjacent_blocks; + adjacent_blocks.reserve(adjacent_block_names.size()); + for (const auto &blk_name : adjacent_block_names) { + auto *eb = region.get_element_block(blk_name); + assert(eb != nullptr); + if (eb->topology()->shape() != Ioss::ElementShape::HEX) { + fmt::print("Skipping Element Block {}; it does not contain HEX elements.\n", blk_name); + } + else { + adjacent_blocks.push_back(eb); + } + } + + // Generate the faces for use later... (only generate on the blocks touching the front) + Ioss::FaceGenerator face_generator(region); + face_generator.generate_block_faces(adjacent_blocks, (INT)0, true); + + for (const auto *block : adjacent_blocks) { // Get the offset into the element_chains vector... - const auto *block = region.get_element_block(adj_block); auto offset = block->get_offset() + 1; auto count = block->entity_count(); - auto front = get_line_front(region, adj_block, element_chains, surface_list); + auto front = get_line_front(region, block, element_chains, surface_list); if (front.empty()) { continue; } // We want a vector giving us the Face for each face of each element in the block... connectivity_t face_connectivity(count); - generate_face_connectivity(face_generator.faces(adj_block), static_cast(offset), + generate_face_connectivity(face_generator.faces(block), static_cast(offset), face_connectivity); // For each face on the "front" (at the beginning the boundary sideset faces) diff --git a/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C b/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C index 0badfc563b..d7b300d672 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C +++ b/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C @@ -327,6 +327,12 @@ namespace Ioss { FaceGenerator::FaceGenerator(Ioss::Region ®ion) : region_(region) {} + FaceUnorderedSet &FaceGenerator::faces(const Ioss::ElementBlock *block) + { + auto name = block->name(); + return faces_[name]; + } + template IOSS_EXPORT void FaceGenerator::generate_faces(int, bool, bool); template IOSS_EXPORT void FaceGenerator::generate_faces(int64_t, bool, bool); @@ -334,7 +340,8 @@ namespace Ioss { void FaceGenerator::generate_faces(INT /*dummy*/, bool block_by_block, bool local_ids) { if (block_by_block) { - generate_block_faces(INT(0), local_ids); + const auto &ebs = region_.get_element_blocks(); + generate_block_faces(ebs, INT(0), local_ids); } else { generate_model_faces(INT(0), local_ids); @@ -349,7 +356,8 @@ namespace Ioss { } } - template void FaceGenerator::generate_block_faces(INT /*dummy*/, bool local_ids) + template void FaceGenerator::generate_block_faces(const Ioss::ElementBlockContainer &ebs, + INT /*dummy*/, bool local_ids) { // Convert ids into hashed-ids Ioss::NodeBlock *nb = region_.get_node_blocks()[0]; @@ -370,7 +378,6 @@ namespace Ioss { auto endh = std::chrono::steady_clock::now(); #endif - const auto &ebs = region_.get_element_blocks(); for (const auto &eb : ebs) { const std::string &name = eb->name(); size_t numel = eb->entity_count(); diff --git a/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.h b/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.h index d88745ffff..c8d4651751 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.h +++ b/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.h @@ -15,6 +15,7 @@ #include #include "ioss_export.h" +#include "Ioss_Region.h" #define FG_USE_ROBIN #if defined FG_USE_STD @@ -28,8 +29,7 @@ #include namespace Ioss { - class Region; - + class ElementBlock; class IOSS_EXPORT Face { public: @@ -123,14 +123,18 @@ namespace Ioss { template void generate_faces(INT /*dummy*/, bool block_by_block = false, bool local_ids = false); + template + void generate_block_faces(const ElementBlockContainer &ebs, + INT /*dummy*/, bool local_ids = false); + FaceUnorderedSet &faces(const std::string &name = "ALL") { return faces_[name]; } + FaceUnorderedSet &faces(const ElementBlock *block); //! Given a local node id (0-based), return the hashed value. size_t node_id_hash(size_t local_node_id) const { return hashIds_[local_node_id]; } private: template void hash_node_ids(const std::vector &node_ids); - template void generate_block_faces(INT /*dummy*/, bool local_ids); template void generate_model_faces(INT /*dummy*/, bool local_ids); Ioss::Region ®ion_; From f22e41dfe2fc6f905eaf85bde41cd2754f69643f Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Tue, 4 Jun 2024 17:54:19 -0600 Subject: [PATCH 02/33] IOSS: Start of support for parallel line decomp --- .../libraries/ioss/src/Ioss_Decomposition.C | 27 +++++++++++++++++++ .../libraries/ioss/src/Ioss_Decomposition.h | 3 +++ .../ioss/src/exodus/Ioex_DecompositionData.C | 7 ++++- .../ioss/src/exodus/Ioex_DecompositionData.h | 2 ++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C index efda361b47..add41b9939 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C +++ b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C @@ -7,6 +7,7 @@ */ #include "Ioss_Decomposition.h" +#include "Ioss_ChainGenerator.h" #include "Ioss_ElementTopology.h" #include "Ioss_Enumerate.h" #include "Ioss_ParallelUtils.h" @@ -490,6 +491,31 @@ namespace Ioss { show_progress("\tIoss::decompose model finished"); } + template IOSS_EXPORT void Decomposition::calculate_element_chains(); + template IOSS_EXPORT void Decomposition::calculate_element_chains(); + template + void Decomposition::calculate_element_chains() + { +#if 0 + Ioss::chain_t element_chains; + std::vector weights; + element_chains = + Ioss::generate_element_chains(region, interFace.lineSurfaceList_, debug_level, dummy); + progress("Ioss::generate_element_chains"); + + if (interFace.decomposition_method() == "rcb" || interFace.decomposition_method() == "rib" || + interFace.decomposition_method() == "hsfc") { + weights = + line_decomp_weights(element_chains, region.get_property("element_count").get_int()); + progress("generate_element_weights"); + } + + if (weights.empty()) { + weights.resize(region.get_property("element_count").get_int(), 1); + } +#endif + } + template IOSS_EXPORT void Decomposition::calculate_element_centroids( const std::vector &x, const std::vector &y, const std::vector &z); template IOSS_EXPORT void Decomposition::calculate_element_centroids( @@ -1080,6 +1106,7 @@ namespace Ioss { #endif #if !defined(NO_ZOLTAN_SUPPORT) + template void Decomposition::zoltan_decompose(Zoltan &zz) { show_progress(__func__); diff --git a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h index 5bae8e227b..80feca4c10 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h +++ b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h @@ -281,6 +281,8 @@ namespace Ioss { void calculate_element_centroids(const std::vector &x, const std::vector &y, const std::vector &z); + void calculate_element_chains(); + #if !defined(NO_ZOLTAN_SUPPORT) void zoltan_decompose(Zoltan &zz); @@ -806,6 +808,7 @@ namespace Ioss { std::vector m_elementToProc; // Used by "MAP" scheme... std::vector m_centroids; + std::vector m_weights; std::vector m_pointer; // Index into adjacency, processor list for each element... std::vector m_adjacency; // Size is sum of element connectivity sizes diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C index 0c4ab1e54b..edf495ec43 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C @@ -79,7 +79,12 @@ namespace { } if (wdim != 0) { - std::fill(wgts, wgts + element_count, 1.0); + if (zdata->weights().empty()) { + std::fill(wgts, wgts + element_count, 1.0); + } + else { + std::copy(zdata->weights().begin(), zdata->weights().end(), &wgts[0]); + } } if (ngid_ent == 1) { diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.h b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.h index 651476b517..3dd04955fd 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.h +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.h @@ -66,6 +66,7 @@ namespace Ioex { IOSS_NODISCARD virtual size_t decomp_elem_count() const = 0; IOSS_NODISCARD virtual std::vector ¢roids() = 0; + IOSS_NODISCARD virtual std::vector &weights() = 0; Ioss_MPI_Comm comm_; @@ -136,6 +137,7 @@ namespace Ioex { IOSS_NODISCARD size_t decomp_elem_count() const { return m_decomposition.file_elem_count(); } IOSS_NODISCARD std::vector ¢roids() { return m_decomposition.m_centroids; } + IOSS_NODISCARD std::vector &weights() { return m_decomposition.m_weights; } template void communicate_element_data(T *file_data, T *ioss_data, size_t comp_count) const From d7b3b576cc4002629ded9289fd481b82fe5d2cd7 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Tue, 4 Jun 2024 21:37:30 -0600 Subject: [PATCH 03/33] IOSS: Explicit template instantiation fix --- packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C b/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C index d7b300d672..c9ddaa6b02 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C +++ b/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C @@ -1,4 +1,4 @@ -// Copyright(C) 1999-2023 National Technology & Engineering Solutions +// Copyright(C) 1999-2024 National Technology & Engineering Solutions // of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with // NTESS, the U.S. Government retains certain rights in this software. // @@ -356,6 +356,9 @@ namespace Ioss { } } + template IOSS_EXPORT void FaceGenerator::generate_block_faces(const Ioss::ElementBlockContainer &, int, bool); + template IOSS_EXPORT void FaceGenerator::generate_block_faces(const Ioss::ElementBlockContainer &, int64_t, bool); + template void FaceGenerator::generate_block_faces(const Ioss::ElementBlockContainer &ebs, INT /*dummy*/, bool local_ids) { From 637c85523e8a3cc43b03f41d31fabd8e390f142e Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Thu, 6 Jun 2024 11:56:00 -0600 Subject: [PATCH 04/33] IOSS: Allow passing filename down into line_decomp --- .../ioss/src/exodus/Ioex_DecompositionData.h | 14 +++++++------- .../ioss/src/exodus/Ioex_ParallelDatabaseIO.C | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.h b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.h index 3dd04955fd..5faa8dc3c2 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.h +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.h @@ -50,11 +50,11 @@ namespace Ioex { DecompositionDataBase(const DecompositionDataBase &) = delete; DecompositionDataBase &operator=(const DecompositionDataBase &) = delete; - virtual ~DecompositionDataBase() = default; - IOSS_NODISCARD virtual int int_size() const = 0; - virtual void decompose_model(int filePtr) = 0; - IOSS_NODISCARD virtual size_t ioss_node_count() const = 0; - IOSS_NODISCARD virtual size_t ioss_elem_count() const = 0; + virtual ~DecompositionDataBase() = default; + IOSS_NODISCARD virtual int int_size() const = 0; + virtual void decompose_model(int filePtr, const std::string &filename) = 0; + IOSS_NODISCARD virtual size_t ioss_node_count() const = 0; + IOSS_NODISCARD virtual size_t ioss_elem_count() const = 0; IOSS_NODISCARD virtual int spatial_dimension() const = 0; IOSS_NODISCARD virtual size_t global_node_count() const = 0; @@ -66,7 +66,7 @@ namespace Ioex { IOSS_NODISCARD virtual size_t decomp_elem_count() const = 0; IOSS_NODISCARD virtual std::vector ¢roids() = 0; - IOSS_NODISCARD virtual std::vector &weights() = 0; + IOSS_NODISCARD virtual std::vector &weights() = 0; Ioss_MPI_Comm comm_; @@ -121,7 +121,7 @@ namespace Ioex { IOSS_NODISCARD int int_size() const { return sizeof(INT); } - void decompose_model(int filePtr); + void decompose_model(int filePtr, const std::string &filename); IOSS_NODISCARD int spatial_dimension() const { return m_decomposition.m_spatialDimension; } diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C index f819428074..ebe977380c 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C @@ -779,11 +779,11 @@ namespace Ioex { decomp = std::make_unique>(properties, util().communicator()); } assert(decomp != nullptr); - decomp->decompose_model(exoid); + decomp->decompose_model(exoid, get_filename()); read_region(); - Ioex::read_exodus_basis(get_file_pointer()); - Ioex::read_exodus_quadrature(get_file_pointer()); + Ioex::read_exodus_basis(exoid); + Ioex::read_exodus_quadrature(exoid); get_elemblocks(); From 0df8dfea31e2e222515746ab0491542ec3c9a734 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Thu, 6 Jun 2024 11:57:44 -0600 Subject: [PATCH 05/33] IOSS: initial line_decompose(); compile/link does not run --- .../libraries/ioss/src/Ioss_Decomposition.C | 23 +- .../libraries/ioss/src/Ioss_Decomposition.h | 2 + .../ioss/src/exodus/Ioex_DecompositionData.C | 398 +++++++++++++++++- 3 files changed, 416 insertions(+), 7 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C index add41b9939..ba69451a94 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C +++ b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C @@ -7,7 +7,6 @@ */ #include "Ioss_Decomposition.h" -#include "Ioss_ChainGenerator.h" #include "Ioss_ElementTopology.h" #include "Ioss_Enumerate.h" #include "Ioss_ParallelUtils.h" @@ -302,6 +301,13 @@ namespace Ioss { props.get("PARMETIS_COMMON_NODE_COUNT").get_int() > 0) { m_commonNodeCount = props.get("PARMETIS_COMMON_NODE_COUNT").get_int(); } + + if (props.exists("LINE_DECOMPOSITION")) { + // The value of the property should be a comma-separated list of surface/sideset names from which the lines will grow, + // or the value "ALL" for all surfaces in the model. + m_lineDecomp = true; + m_decompExtra = props.get("LINE_DECOMPOSITION").get_string(); + } } template IOSS_EXPORT void @@ -453,6 +459,12 @@ namespace Ioss { if (m_method == "MAP") { guided_decompose(); } + if (m_method == "SPECIFIED") { + // Currently used for line decomposition with another decomposition type. + // The line-modified decomposition is done prior to this and builds the + // m_elementToProc which is then used here to decompose the elements... + guided_decompose(); + } show_progress("\tfinished with decomposition method"); Ioss::sort(importElementMap); @@ -1105,6 +1117,15 @@ namespace Ioss { } #endif + template void Decomposition::line_decompose() + { + show_progress(__func__); + // Currently, we perform the line decomposition in serial on rank + // 0 and then broadcast the `elementToProc` map to each rank which + // then does a guided decompostion. + + } + #if !defined(NO_ZOLTAN_SUPPORT) template void Decomposition::zoltan_decompose(Zoltan &zz) diff --git a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h index 80feca4c10..b57ad90ba5 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h +++ b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h @@ -277,6 +277,7 @@ namespace Ioss { void simple_decompose(); void simple_node_decompose(); void guided_decompose(); + void line_decompose(); void calculate_element_centroids(const std::vector &x, const std::vector &y, const std::vector &z); @@ -803,6 +804,7 @@ namespace Ioss { size_t m_importPreLocalNodeIndex{0}; bool m_retainFreeNodes{true}; + bool m_lineDecomp{false}; bool m_showProgress{false}; bool m_showHWM{false}; diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C index edf495ec43..83b0bb207a 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C @@ -7,10 +7,14 @@ #include "Ioss_CodeTypes.h" #include "exodus/Ioex_DecompositionData.h" #if defined PARALLEL_AWARE_EXODUS -#include "Ioss_ElementTopology.h" // for ElementTopology -#include "Ioss_Field.h" // for Field, etc -#include "Ioss_Map.h" // for Map, MapContainer -#include "Ioss_PropertyManager.h" // for PropertyManager +#include "Ioss_ChainGenerator.h" +#include "Ioss_ElementTopology.h" +#include "Ioss_Field.h" +#include "Ioss_IOFactory.h" +#include "Ioss_Map.h" +#include "Ioss_NodeBlock.h" +#include "Ioss_PropertyManager.h" +#include "Ioss_Region.h" #include "Ioss_SmartAssert.h" #include "Ioss_Sort.h" #include "Ioss_Utils.h" @@ -39,8 +43,8 @@ #endif namespace { - // ZOLTAN Callback functions... + // ZOLTAN Callback functions... #if !defined(NO_ZOLTAN_SUPPORT) int zoltan_num_dim(void *data, int *ierr) { @@ -112,6 +116,350 @@ namespace { *ierr = ZOLTAN_OK; } #endif + +template +std::map> string_chains(const Ioss::chain_t &element_chains) +{ + std::map> chains; + + for (size_t i = 0; i < element_chains.size(); i++) { + auto &chain_entry = element_chains[i]; + if (chain_entry.link >= 0) { + chains[chain_entry.element].push_back(i + 1); + } + } + return chains; +} + +template +std::vector line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count) +{ + int debug_level = 0; + auto chains = string_chains(element_chains); + + if ((debug_level & 16) != 0) { + for (const auto &[chain_root, chain_elements] : chains) { + fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); + } + } + + std::vector weights(element_count, 1); + // Now, for each chain... + for (const auto &[chain_root, chain_elements] : chains) { + // * Set the weights of all elements in the chain... + // * non-root = 0, root = length of chain. + for (const auto &element : chain_elements) { + weights[element - 1] = 0; + } + weights[chain_root - 1] = static_cast(chain_elements.size()); + } + return weights; +} +template std::vector line_decomp_weights(const Ioss::chain_t &element_chains, + size_t element_count); +template std::vector line_decomp_weights(const Ioss::chain_t &element_chains, + size_t element_count); + + +template +void line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, + int proc_count) +{ + int debug_level = 0; + // Get a map of all chains and the elements in the chains. Map key will be root. + auto chains = string_chains(element_chains); + + // Delta: elements added/removed from each processor... + std::vector delta(proc_count); + + // Now, for each chain... + for (const auto &[chain_root, chain_elements] : chains) { + if ((debug_level & 16) != 0) { + fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); + } + + std::vector chain_proc_count(proc_count); + + // * get processors used by elements in the chain... + for (const auto &element : chain_elements) { + auto proc = elem_to_proc[element - 1]; + chain_proc_count[proc]++; + } + + // * Now, subtract the `delta` from each count + for (int i = 0; i < proc_count; i++) { + chain_proc_count[i] -= delta[i]; + } + + // * Assign all elements in the chain to processor at chain root + // * Update the deltas for all processors that gain/lose elements... + auto root_proc = elem_to_proc[chain_root - 1]; + for (const auto &element : chain_elements) { + if (elem_to_proc[element - 1] != root_proc) { + auto old_proc = elem_to_proc[element - 1]; + elem_to_proc[element - 1] = root_proc; + delta[root_proc]++; + delta[old_proc]--; + } + } + } + + std::vector proc_element_count(proc_count); + for (auto proc : elem_to_proc) { + proc_element_count[proc]++; + } + if ((debug_level & 32) != 0) { + fmt::print("\nElements/Processor: {}\n", fmt::join(proc_element_count, ", ")); + fmt::print("Delta/Processor: {}\n", fmt::join(delta, ", ")); + } +} + + template + std::tuple, std::vector, std::vector> + get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED INT dummy) + { + size_t element_count = region.get_property("element_count").get_int(); + + // The zoltan methods supported in slice are all geometry based + // and use the element centroid. + std::vector x(element_count); + std::vector y(element_count); + std::vector z(element_count); + + const auto *nb = region.get_node_blocks()[0]; + std::vector coor; + nb->get_field_data("mesh_model_coordinates", coor); + + const auto &blocks = region.get_element_blocks(); + size_t el = 0; + for (auto &eb : blocks) { + std::vector connectivity; + eb->get_field_data("connectivity_raw", connectivity); + size_t blk_element_count = eb->entity_count(); + size_t blk_element_nodes = eb->topology()->number_nodes(); + + for (size_t j = 0; j < blk_element_count; j++) { + for (size_t k = 0; k < blk_element_nodes; k++) { + auto node = connectivity[j * blk_element_nodes + k] - 1; + x[el] += coor[node * 3 + 0]; + y[el] += coor[node * 3 + 1]; + z[el] += coor[node * 3 + 2]; + } + x[el] /= blk_element_nodes; + y[el] /= blk_element_nodes; + z[el] /= blk_element_nodes; + el++; + } + } + return {x, y, z}; + } + /*****************************************************************************/ + /***** Global data structure used by Zoltan callbacks. *****/ + /***** Could implement Zoltan callbacks without global data structure, *****/ + /***** but using the global data structure makes implementation quick. *****/ + struct + { + size_t ndot; /* Length of x, y, z, and part (== # of elements) */ + float *vwgt; /* vertex weights */ + double *x; /* x-coordinates */ + double *y; /* y-coordinates */ + double *z; /* z-coordinates */ + } Zoltan_Data; + + /*****************************************************************************/ + /***** ZOLTAN CALLBACK FUNCTIONS *****/ + int zoltan_num_dim_ser(void * /*data*/, int *ierr) + { + /* Return dimensionality of coordinate data. + * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. + */ + *ierr = ZOLTAN_OK; + if (Zoltan_Data.z != nullptr) { + return 3; + } + if (Zoltan_Data.y != nullptr) { + return 2; + } + return 1; + } + + int zoltan_num_obj_ser(void * /*data*/, int *ierr) + { + /* Return number of objects. + * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. + */ + *ierr = ZOLTAN_OK; + return Zoltan_Data.ndot; + } + + void zoltan_obj_list_ser(void * /*data*/, int /*ngid_ent*/, int /*nlid_ent*/, ZOLTAN_ID_PTR gids, + ZOLTAN_ID_PTR /*lids*/, int wdim, float *wgts, int *ierr) + { + /* Return list of object IDs. + * Return only global IDs; don't need local IDs since running in serial. + * gids are array indices for coordinate and vwgts arrays. + * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. + */ + std::iota(gids, gids + Zoltan_Data.ndot, 0); + if (wdim != 0) { + for (size_t i = 0; i < Zoltan_Data.ndot; i++) { + wgts[i] = static_cast(Zoltan_Data.vwgt[i]); + } + } + + *ierr = ZOLTAN_OK; + } + + void zoltan_geom_ser(void * /*data*/, int /*ngid_ent*/, int /*nlid_ent*/, int nobj, + const ZOLTAN_ID_PTR gids, ZOLTAN_ID_PTR /*lids*/, int ndim, double *geom, + int *ierr) + { + /* Return coordinates for objects. + * gids are array indices for coordinate arrays. + * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. + */ + + for (size_t i = 0; i < static_cast(nobj); i++) { + size_t j = gids[i]; + geom[i * ndim] = Zoltan_Data.x[j]; + if (ndim > 1) { + geom[i * ndim + 1] = Zoltan_Data.y[j]; + } + if (ndim > 2) { + geom[i * ndim + 2] = Zoltan_Data.z[j]; + } + } + + *ierr = ZOLTAN_OK; + } + + template + void decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, + std::vector &elem_to_proc, const std::vector &weights, + IOSS_MAYBE_UNUSED INT dummy) + { + if (ranks == 1) { + return; + } + + size_t element_count = region.get_property("element_count").get_int(); + if (element_count != static_cast(static_cast(element_count))) { + fmt::print(stderr, "ERROR: Cannot have a mesh with more than 2.1 Billion elements in a " + "Zoltan decomposition.\n"); + exit(EXIT_FAILURE); + } + + auto [x, y, z] = get_element_centroid(region, dummy); + + // Copy mesh data and pointers into structure accessible from callback fns. + Zoltan_Data.ndot = element_count; + Zoltan_Data.vwgt = const_cast(Data(weights)); + + Zoltan_Data.x = Data(x); + Zoltan_Data.y = Data(y); + Zoltan_Data.z = Data(z); + + // Initialize Zoltan + int argc = 0; + char **argv = nullptr; + + float ver = 0.0; + Zoltan_Initialize(argc, argv, &ver); + fmt::print("Using Zoltan version {:.2}, method {}\n", static_cast(ver), + method); + + Zoltan zz(Ioss::ParallelUtils::comm_self()); + + // Register Callback functions + // Using global Zoltan_Data; could register it here instead as data field. + zz.Set_Num_Obj_Fn(zoltan_num_obj_ser, nullptr); + zz.Set_Obj_List_Fn(zoltan_obj_list_ser, nullptr); + zz.Set_Num_Geom_Fn(zoltan_num_dim_ser, nullptr); + zz.Set_Geom_Multi_Fn(zoltan_geom_ser, nullptr); + + // Set parameters for Zoltan + zz.Set_Param("DEBUG_LEVEL", "0"); + std::string str = fmt::format("{}", ranks); + zz.Set_Param("NUM_GLOBAL_PARTS", str); + zz.Set_Param("OBJ_WEIGHT_DIM", "1"); + zz.Set_Param("LB_METHOD", method); + zz.Set_Param("NUM_LID_ENTRIES", "0"); + zz.Set_Param("REMAP", "0"); + zz.Set_Param("RETURN_LISTS", "PARTITION_ASSIGNMENTS"); + zz.Set_Param("RCB_RECTILINEAR_BLOCKS", "1"); + + int num_global = sizeof(INT) / sizeof(ZOLTAN_ID_TYPE); + num_global = num_global < 1 ? 1 : num_global; + + // Call partitioner + int changes = 0; + int num_local = 0; + int num_import = 1; + int num_export = 1; + ZOLTAN_ID_PTR import_global_ids = nullptr; + ZOLTAN_ID_PTR import_local_ids = nullptr; + ZOLTAN_ID_PTR export_global_ids = nullptr; + ZOLTAN_ID_PTR export_local_ids = nullptr; + int *import_procs = nullptr; + int *import_to_part = nullptr; + int *export_procs = nullptr; + int *export_to_part = nullptr; + int rc = zz.LB_Partition(changes, num_global, num_local, num_import, import_global_ids, + import_local_ids, import_procs, import_to_part, num_export, + export_global_ids, export_local_ids, export_procs, export_to_part); + + if (rc != ZOLTAN_OK) { + fmt::print(stderr, "ERROR: Problem during call to Zoltan LB_Partition.\n"); + goto End; + } + + // Sanity check + if (element_count != static_cast(num_export)) { + fmt::print(stderr, "Sanity check failed; ndot {} != num_export {}.\n", element_count, + static_cast(num_export)); + goto End; + } + + elem_to_proc.resize(element_count); + for (size_t i = 0; i < element_count; i++) { + elem_to_proc[i] = export_to_part[i]; + } + + End: + /* Clean up */ + Zoltan::LB_Free_Part(&export_global_ids, &export_local_ids, &export_procs, &export_to_part); + Zoltan::LB_Free_Part(&export_global_ids, &export_local_ids, &export_procs, &export_to_part); + } + +template void line_decomp_modify(const Ioss::chain_t &element_chains, + std::vector &elem_to_proc, int proc_count); +template void line_decomp_modify(const Ioss::chain_t &element_chains, + std::vector &elem_to_proc, int proc_count); + + int line_decompose(Ioss::Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector element_to_proc) + { + + int dummy = 0; + Ioss::chain_t element_chains = + Ioss::generate_element_chains(region, surface_list, 0, dummy); + region.get_database()->progress("Ioss::generate_element_chains"); + + std::vector weights = + line_decomp_weights(element_chains, region.get_property("element_count").get_int()); + region.get_database()->progress("generate_element_weights"); + + double start = Ioss::Utils::timer(); + std::vector elem_to_proc; + decompose_zoltan(region, num_ranks, method, elem_to_proc, weights, dummy); + double end = Ioss::Utils::timer(); + fmt::print(stderr, "Decompose elements = {:.5}\n", end - start); + region.get_database()->progress("exit decompose_elements"); + + // Make sure all elements on a chain are on the same processor rank... + line_decomp_modify(element_chains, elem_to_proc, num_ranks); + + return 1; + } } // namespace namespace Ioex { @@ -130,7 +478,7 @@ namespace Ioex { m_processorCount = pu.parallel_size(); } - template void DecompositionData::decompose_model(int filePtr) + template void DecompositionData::decompose_model(int filePtr, const std::string &filename) { m_decomposition.show_progress(__func__); // Initial decomposition is linear where processor #p contains @@ -261,6 +609,44 @@ namespace Ioex { } } + if (m_decomposition.m_lineDecomp) { + // For first iteration of this, we do the line-decomp modified decomposition on a single rank + // and then communicate the m_elementToProc vector to each of the ranks. This is then used + // do do the parallel distributions/decomposition of the elements assuming a "guided" decomposition. + std::vector element_to_proc_global{}; + + if (m_processor == 0) { + Ioss::PropertyManager properties; + Ioss::DatabaseIO *dbi = Ioss::IOFactory::create("exodus", filename, Ioss::READ_RESTART, + Ioss::ParallelUtils::comm_self(), properties); + Ioss::Region region(dbi, "line_decomp_region"); + int status = line_decompose(region, m_processorCount, m_decomposition.m_method, m_decomposition.m_decompExtra, element_to_proc_global); + } + + // Now broadcast the parts of the `element_to_proc_global` + // vector to the owning ranks in the initial linear + // decomposition... + + std::vector sendcounts(m_processorCount); + std::vector displs(m_processorCount); + m_decomposition.m_elementToProc.resize(decomp_elem_count()); + + // calculate send counts and displacements + int sum = 0; + int rem = globalElementCount % m_processorCount; + for (int i = 0; i < m_processorCount; i++) { + sendcounts[i] = globalElementCount/m_processorCount; + if (rem > 0) { + sendcounts[i]++; + rem--; + } + displs[i] = sum; + sum += sendcounts[i]; + } + MPI_Scatterv(Data(element_to_proc_global), Data(sendcounts), Data(displs), MPI_INT, Data(m_decomposition.m_elementToProc), decomp_elem_count(), Ioss::mpi_type(INT(0)), 0, m_decomposition.m_comm); + } + + #if !defined(NO_ZOLTAN_SUPPORT) float version = 0.0; Zoltan_Initialize(0, nullptr, &version); From 5f4c4828cfa471d2e21511150f2b684811ee99f6 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Thu, 6 Jun 2024 14:42:06 -0600 Subject: [PATCH 06/33] IOSS: Refactor to try to eliminate duplicate code --- .../libraries/ioss/src/Ioss_Decomposition.C | 24 -- .../libraries/ioss/src/Ioss_Decomposition.h | 2 - .../ioss/src/Ioss_DecompositionUtils.C | 376 ++++++++++++++++++ .../ioss/src/Ioss_DecompositionUtils.h | 33 ++ .../ioss/src/exodus/Ioex_DecompositionData.C | 349 +--------------- 5 files changed, 412 insertions(+), 372 deletions(-) create mode 100644 packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C create mode 100644 packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h diff --git a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C index ba69451a94..0760e990ff 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C +++ b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C @@ -503,30 +503,6 @@ namespace Ioss { show_progress("\tIoss::decompose model finished"); } - template IOSS_EXPORT void Decomposition::calculate_element_chains(); - template IOSS_EXPORT void Decomposition::calculate_element_chains(); - template - void Decomposition::calculate_element_chains() - { -#if 0 - Ioss::chain_t element_chains; - std::vector weights; - element_chains = - Ioss::generate_element_chains(region, interFace.lineSurfaceList_, debug_level, dummy); - progress("Ioss::generate_element_chains"); - - if (interFace.decomposition_method() == "rcb" || interFace.decomposition_method() == "rib" || - interFace.decomposition_method() == "hsfc") { - weights = - line_decomp_weights(element_chains, region.get_property("element_count").get_int()); - progress("generate_element_weights"); - } - - if (weights.empty()) { - weights.resize(region.get_property("element_count").get_int(), 1); - } -#endif - } template IOSS_EXPORT void Decomposition::calculate_element_centroids( const std::vector &x, const std::vector &y, const std::vector &z); diff --git a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h index b57ad90ba5..f5a239b635 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h +++ b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h @@ -282,8 +282,6 @@ namespace Ioss { void calculate_element_centroids(const std::vector &x, const std::vector &y, const std::vector &z); - void calculate_element_chains(); - #if !defined(NO_ZOLTAN_SUPPORT) void zoltan_decompose(Zoltan &zz); diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C new file mode 100644 index 0000000000..2a4f7034b9 --- /dev/null +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -0,0 +1,376 @@ +/* + * Copyright(C) 2024 National Technology & Engineering Solutions + * of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with + * NTESS, the U.S. Government retains certain rights in this software. + * + * See packages/seacas/LICENSE for details + */ + +#include +#include + +#include "Ioss_DecompositionUtils.h" +#include "Ioss_CodeTypes.h" +#include "Ioss_ChainGenerator.h" +#include "Ioss_Decomposition.h" +#include "Ioss_NodeBlock.h" +#include "Ioss_ElementBlock.h" +#include "Ioss_Region.h" + +#include "Ioss_ChainGenerator.h" + +#if !defined(NO_ZOLTAN_SUPPORT) +#include // for Zoltan_Initialize +#include // for Zoltan +#endif + +namespace { +#if !defined(NO_ZOLTAN_SUPPORT) + /*****************************************************************************/ + /***** Global data structure used by Zoltan callbacks. *****/ + /***** Could implement Zoltan callbacks without global data structure, *****/ + /***** but using the global data structure makes implementation quick. *****/ + struct + { + size_t ndot; /* Length of x, y, z, and part (== # of elements) */ + float *vwgt; /* vertex weights */ + double *x; /* x-coordinates */ + double *y; /* y-coordinates */ + double *z; /* z-coordinates */ + } Zoltan_Data; + + /*****************************************************************************/ + /***** ZOLTAN CALLBACK FUNCTIONS *****/ + int zoltan_num_dim(void * /*data*/, int *ierr) + { + /* Return dimensionality of coordinate data. + * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. + */ + *ierr = ZOLTAN_OK; + if (Zoltan_Data.z != nullptr) { + return 3; + } + if (Zoltan_Data.y != nullptr) { + return 2; + } + return 1; + } + + int zoltan_num_obj(void * /*data*/, int *ierr) + { + /* Return number of objects. + * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. + */ + *ierr = ZOLTAN_OK; + return Zoltan_Data.ndot; + } + + void zoltan_obj_list(void * /*data*/, int /*ngid_ent*/, int /*nlid_ent*/, ZOLTAN_ID_PTR gids, + ZOLTAN_ID_PTR /*lids*/, int wdim, float *wgts, int *ierr) + { + /* Return list of object IDs. + * Return only global IDs; don't need local IDs since running in serial. + * gids are array indices for coordinate and vwgts arrays. + * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. + */ + std::iota(gids, gids + Zoltan_Data.ndot, 0); + if (wdim != 0) { + for (size_t i = 0; i < Zoltan_Data.ndot; i++) { + wgts[i] = static_cast(Zoltan_Data.vwgt[i]); + } + } + + *ierr = ZOLTAN_OK; + } + + void zoltan_geom(void * /*data*/, int /*ngid_ent*/, int /*nlid_ent*/, int nobj, + const ZOLTAN_ID_PTR gids, ZOLTAN_ID_PTR /*lids*/, int ndim, double *geom, + int *ierr) + { + /* Return coordinates for objects. + * gids are array indices for coordinate arrays. + * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. + */ + + for (size_t i = 0; i < static_cast(nobj); i++) { + size_t j = gids[i]; + geom[i * ndim] = Zoltan_Data.x[j]; + if (ndim > 1) { + geom[i * ndim + 1] = Zoltan_Data.y[j]; + } + if (ndim > 2) { + geom[i * ndim + 2] = Zoltan_Data.z[j]; + } + } + + *ierr = ZOLTAN_OK; + } +#endif + +template +std::map> string_chains(const Ioss::chain_t &element_chains) +{ + std::map> chains; + + for (size_t i = 0; i < element_chains.size(); i++) { + auto &chain_entry = element_chains[i]; + if (chain_entry.link >= 0) { + chains[chain_entry.element].push_back(i + 1); + } + } + return chains; +} + + template + std::tuple, std::vector, std::vector> + get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED INT dummy) + { + size_t element_count = region.get_property("element_count").get_int(); + + // The zoltan methods supported in slice are all geometry based + // and use the element centroid. + std::vector x(element_count); + std::vector y(element_count); + std::vector z(element_count); + + const auto *nb = region.get_node_blocks()[0]; + std::vector coor; + nb->get_field_data("mesh_model_coordinates", coor); + + const auto &blocks = region.get_element_blocks(); + size_t el = 0; + for (auto &eb : blocks) { + std::vector connectivity; + eb->get_field_data("connectivity_raw", connectivity); + size_t blk_element_count = eb->entity_count(); + size_t blk_element_nodes = eb->topology()->number_nodes(); + + for (size_t j = 0; j < blk_element_count; j++) { + for (size_t k = 0; k < blk_element_nodes; k++) { + auto node = connectivity[j * blk_element_nodes + k] - 1; + x[el] += coor[node * 3 + 0]; + y[el] += coor[node * 3 + 1]; + z[el] += coor[node * 3 + 2]; + } + x[el] /= blk_element_nodes; + y[el] /= blk_element_nodes; + z[el] /= blk_element_nodes; + el++; + } + } + return {x, y, z}; + } + + template + void decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, + std::vector &elem_to_proc, const std::vector &weights, + IOSS_MAYBE_UNUSED INT dummy) + { + if (ranks == 1) { + return; + } + + size_t element_count = region.get_property("element_count").get_int(); + if (element_count != static_cast(static_cast(element_count))) { + fmt::print(stderr, "ERROR: Cannot have a mesh with more than 2.1 Billion elements in a " + "Zoltan decomposition.\n"); + exit(EXIT_FAILURE); + } + + auto [x, y, z] = get_element_centroid(region, dummy); + + // Copy mesh data and pointers into structure accessible from callback fns. + Zoltan_Data.ndot = element_count; + Zoltan_Data.vwgt = const_cast(Data(weights)); + + Zoltan_Data.x = Data(x); + Zoltan_Data.y = Data(y); + Zoltan_Data.z = Data(z); + + // Initialize Zoltan + int argc = 0; + char **argv = nullptr; + + float ver = 0.0; + Zoltan_Initialize(argc, argv, &ver); + fmt::print("Using Zoltan version {:.2}, method {}\n", static_cast(ver), + method); + + Zoltan zz(Ioss::ParallelUtils::comm_self()); + + // Register Callback functions + // Using global Zoltan_Data; could register it here instead as data field. + zz.Set_Num_Obj_Fn(zoltan_num_obj, nullptr); + zz.Set_Obj_List_Fn(zoltan_obj_list, nullptr); + zz.Set_Num_Geom_Fn(zoltan_num_dim, nullptr); + zz.Set_Geom_Multi_Fn(zoltan_geom, nullptr); + + // Set parameters for Zoltan + zz.Set_Param("DEBUG_LEVEL", "0"); + std::string str = fmt::format("{}", ranks); + zz.Set_Param("NUM_GLOBAL_PARTS", str); + zz.Set_Param("OBJ_WEIGHT_DIM", "1"); + zz.Set_Param("LB_METHOD", method); + zz.Set_Param("NUM_LID_ENTRIES", "0"); + zz.Set_Param("REMAP", "0"); + zz.Set_Param("RETURN_LISTS", "PARTITION_ASSIGNMENTS"); + zz.Set_Param("RCB_RECTILINEAR_BLOCKS", "1"); + + int num_global = sizeof(INT) / sizeof(ZOLTAN_ID_TYPE); + num_global = num_global < 1 ? 1 : num_global; + + // Call partitioner + int changes = 0; + int num_local = 0; + int num_import = 1; + int num_export = 1; + ZOLTAN_ID_PTR import_global_ids = nullptr; + ZOLTAN_ID_PTR import_local_ids = nullptr; + ZOLTAN_ID_PTR export_global_ids = nullptr; + ZOLTAN_ID_PTR export_local_ids = nullptr; + int *import_procs = nullptr; + int *import_to_part = nullptr; + int *export_procs = nullptr; + int *export_to_part = nullptr; + int rc = zz.LB_Partition(changes, num_global, num_local, num_import, import_global_ids, + import_local_ids, import_procs, import_to_part, num_export, + export_global_ids, export_local_ids, export_procs, export_to_part); + + if (rc != ZOLTAN_OK) { + fmt::print(stderr, "ERROR: Problem during call to Zoltan LB_Partition.\n"); + goto End; + } + + // Sanity check + if (element_count != static_cast(num_export)) { + fmt::print(stderr, "Sanity check failed; ndot {} != num_export {}.\n", element_count, + static_cast(num_export)); + goto End; + } + + elem_to_proc.resize(element_count); + for (size_t i = 0; i < element_count; i++) { + elem_to_proc[i] = export_to_part[i]; + } + + End: + /* Clean up */ + Zoltan::LB_Free_Part(&export_global_ids, &export_local_ids, &export_procs, &export_to_part); + Zoltan::LB_Free_Part(&export_global_ids, &export_local_ids, &export_procs, &export_to_part); + } +} + +namespace Ioss { +int line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc) +{ + int dummy = 0; + Ioss::chain_t element_chains = + Ioss::generate_element_chains(region, surface_list, 0, dummy); + region.get_database()->progress("Ioss::generate_element_chains"); + + std::vector weights = + Ioss::line_decomp_weights(element_chains, region.get_property("element_count").get_int()); + region.get_database()->progress("generate_element_weights"); + + double start = Ioss::Utils::timer(); + std::vector elem_to_proc; + decompose_zoltan(region, num_ranks, method, elem_to_proc, weights, dummy); + double end = Ioss::Utils::timer(); + fmt::print(stderr, "Decompose elements = {:.5}\n", end - start); + region.get_database()->progress("exit decompose_elements"); + + // Make sure all elements on a chain are on the same processor rank... + Ioss::line_decomp_modify(element_chains, elem_to_proc, num_ranks); + + return 1; +} + +template +std::vector line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count) +{ + int debug_level = 0; + auto chains = string_chains(element_chains); + + if ((debug_level & 16) != 0) { + for (const auto &[chain_root, chain_elements] : chains) { + fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); + } + } + + std::vector weights(element_count, 1); + // Now, for each chain... + for (const auto &[chain_root, chain_elements] : chains) { + // * Set the weights of all elements in the chain... + // * non-root = 0, root = length of chain. + for (const auto &element : chain_elements) { + weights[element - 1] = 0; + } + weights[chain_root - 1] = static_cast(chain_elements.size()); + } + return weights; +} +template std::vector line_decomp_weights(const Ioss::chain_t &element_chains, + size_t element_count); +template std::vector line_decomp_weights(const Ioss::chain_t &element_chains, + size_t element_count); + + +template +void line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, + int proc_count) +{ + int debug_level = 0; + // Get a map of all chains and the elements in the chains. Map key will be root. + auto chains = string_chains(element_chains); + + // Delta: elements added/removed from each processor... + std::vector delta(proc_count); + + // Now, for each chain... + for (const auto &[chain_root, chain_elements] : chains) { + if ((debug_level & 16) != 0) { + fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); + } + + std::vector chain_proc_count(proc_count); + + // * get processors used by elements in the chain... + for (const auto &element : chain_elements) { + auto proc = elem_to_proc[element - 1]; + chain_proc_count[proc]++; + } + + // * Now, subtract the `delta` from each count + for (int i = 0; i < proc_count; i++) { + chain_proc_count[i] -= delta[i]; + } + + // * Assign all elements in the chain to processor at chain root + // * Update the deltas for all processors that gain/lose elements... + auto root_proc = elem_to_proc[chain_root - 1]; + for (const auto &element : chain_elements) { + if (elem_to_proc[element - 1] != root_proc) { + auto old_proc = elem_to_proc[element - 1]; + elem_to_proc[element - 1] = root_proc; + delta[root_proc]++; + delta[old_proc]--; + } + } + } + + std::vector proc_element_count(proc_count); + for (auto proc : elem_to_proc) { + proc_element_count[proc]++; + } + if ((debug_level & 32) != 0) { + fmt::print("\nElements/Processor: {}\n", fmt::join(proc_element_count, ", ")); + fmt::print("Delta/Processor: {}\n", fmt::join(delta, ", ")); + } +} + + +template void line_decomp_modify(const Ioss::chain_t &element_chains, + std::vector &elem_to_proc, int proc_count); +template void line_decomp_modify(const Ioss::chain_t &element_chains, + std::vector &elem_to_proc, int proc_count); +} diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h new file mode 100644 index 0000000000..314a3d2a77 --- /dev/null +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h @@ -0,0 +1,33 @@ +/* + * Copyright(C) 2024 National Technology & Engineering Solutions + * of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with + * NTESS, the U.S. Government retains certain rights in this software. + * + * See packages/seacas/LICENSE for details + */ +#pragma once + +#include "ioss_export.h" + +#include "Ioss_ChainGenerator.h" +#include "Ioss_Region.h" +#include +#include + +namespace Ioss { + template + void line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, + int proc_count); + + template + void output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, + size_t number_elements); + + template + std::vector line_decomp_weights(const Ioss::chain_t &element_chains, + size_t element_count); + + IOSS_EXPORT int line_decompose(Region ®ion, size_t num_ranks, const std::string &method, + const std::string &surface_list, + std::vector &element_to_proc); +} // namespace Ioss diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C index 83b0bb207a..5663574f60 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C @@ -6,13 +6,13 @@ #include "Ioss_CodeTypes.h" #include "exodus/Ioex_DecompositionData.h" +#include "Ioss_DecompositionUtils.h" + #if defined PARALLEL_AWARE_EXODUS -#include "Ioss_ChainGenerator.h" #include "Ioss_ElementTopology.h" #include "Ioss_Field.h" #include "Ioss_IOFactory.h" #include "Ioss_Map.h" -#include "Ioss_NodeBlock.h" #include "Ioss_PropertyManager.h" #include "Ioss_Region.h" #include "Ioss_SmartAssert.h" @@ -117,349 +117,6 @@ namespace { } #endif -template -std::map> string_chains(const Ioss::chain_t &element_chains) -{ - std::map> chains; - - for (size_t i = 0; i < element_chains.size(); i++) { - auto &chain_entry = element_chains[i]; - if (chain_entry.link >= 0) { - chains[chain_entry.element].push_back(i + 1); - } - } - return chains; -} - -template -std::vector line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count) -{ - int debug_level = 0; - auto chains = string_chains(element_chains); - - if ((debug_level & 16) != 0) { - for (const auto &[chain_root, chain_elements] : chains) { - fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); - } - } - - std::vector weights(element_count, 1); - // Now, for each chain... - for (const auto &[chain_root, chain_elements] : chains) { - // * Set the weights of all elements in the chain... - // * non-root = 0, root = length of chain. - for (const auto &element : chain_elements) { - weights[element - 1] = 0; - } - weights[chain_root - 1] = static_cast(chain_elements.size()); - } - return weights; -} -template std::vector line_decomp_weights(const Ioss::chain_t &element_chains, - size_t element_count); -template std::vector line_decomp_weights(const Ioss::chain_t &element_chains, - size_t element_count); - - -template -void line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, - int proc_count) -{ - int debug_level = 0; - // Get a map of all chains and the elements in the chains. Map key will be root. - auto chains = string_chains(element_chains); - - // Delta: elements added/removed from each processor... - std::vector delta(proc_count); - - // Now, for each chain... - for (const auto &[chain_root, chain_elements] : chains) { - if ((debug_level & 16) != 0) { - fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); - } - - std::vector chain_proc_count(proc_count); - - // * get processors used by elements in the chain... - for (const auto &element : chain_elements) { - auto proc = elem_to_proc[element - 1]; - chain_proc_count[proc]++; - } - - // * Now, subtract the `delta` from each count - for (int i = 0; i < proc_count; i++) { - chain_proc_count[i] -= delta[i]; - } - - // * Assign all elements in the chain to processor at chain root - // * Update the deltas for all processors that gain/lose elements... - auto root_proc = elem_to_proc[chain_root - 1]; - for (const auto &element : chain_elements) { - if (elem_to_proc[element - 1] != root_proc) { - auto old_proc = elem_to_proc[element - 1]; - elem_to_proc[element - 1] = root_proc; - delta[root_proc]++; - delta[old_proc]--; - } - } - } - - std::vector proc_element_count(proc_count); - for (auto proc : elem_to_proc) { - proc_element_count[proc]++; - } - if ((debug_level & 32) != 0) { - fmt::print("\nElements/Processor: {}\n", fmt::join(proc_element_count, ", ")); - fmt::print("Delta/Processor: {}\n", fmt::join(delta, ", ")); - } -} - - template - std::tuple, std::vector, std::vector> - get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED INT dummy) - { - size_t element_count = region.get_property("element_count").get_int(); - - // The zoltan methods supported in slice are all geometry based - // and use the element centroid. - std::vector x(element_count); - std::vector y(element_count); - std::vector z(element_count); - - const auto *nb = region.get_node_blocks()[0]; - std::vector coor; - nb->get_field_data("mesh_model_coordinates", coor); - - const auto &blocks = region.get_element_blocks(); - size_t el = 0; - for (auto &eb : blocks) { - std::vector connectivity; - eb->get_field_data("connectivity_raw", connectivity); - size_t blk_element_count = eb->entity_count(); - size_t blk_element_nodes = eb->topology()->number_nodes(); - - for (size_t j = 0; j < blk_element_count; j++) { - for (size_t k = 0; k < blk_element_nodes; k++) { - auto node = connectivity[j * blk_element_nodes + k] - 1; - x[el] += coor[node * 3 + 0]; - y[el] += coor[node * 3 + 1]; - z[el] += coor[node * 3 + 2]; - } - x[el] /= blk_element_nodes; - y[el] /= blk_element_nodes; - z[el] /= blk_element_nodes; - el++; - } - } - return {x, y, z}; - } - /*****************************************************************************/ - /***** Global data structure used by Zoltan callbacks. *****/ - /***** Could implement Zoltan callbacks without global data structure, *****/ - /***** but using the global data structure makes implementation quick. *****/ - struct - { - size_t ndot; /* Length of x, y, z, and part (== # of elements) */ - float *vwgt; /* vertex weights */ - double *x; /* x-coordinates */ - double *y; /* y-coordinates */ - double *z; /* z-coordinates */ - } Zoltan_Data; - - /*****************************************************************************/ - /***** ZOLTAN CALLBACK FUNCTIONS *****/ - int zoltan_num_dim_ser(void * /*data*/, int *ierr) - { - /* Return dimensionality of coordinate data. - * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. - */ - *ierr = ZOLTAN_OK; - if (Zoltan_Data.z != nullptr) { - return 3; - } - if (Zoltan_Data.y != nullptr) { - return 2; - } - return 1; - } - - int zoltan_num_obj_ser(void * /*data*/, int *ierr) - { - /* Return number of objects. - * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. - */ - *ierr = ZOLTAN_OK; - return Zoltan_Data.ndot; - } - - void zoltan_obj_list_ser(void * /*data*/, int /*ngid_ent*/, int /*nlid_ent*/, ZOLTAN_ID_PTR gids, - ZOLTAN_ID_PTR /*lids*/, int wdim, float *wgts, int *ierr) - { - /* Return list of object IDs. - * Return only global IDs; don't need local IDs since running in serial. - * gids are array indices for coordinate and vwgts arrays. - * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. - */ - std::iota(gids, gids + Zoltan_Data.ndot, 0); - if (wdim != 0) { - for (size_t i = 0; i < Zoltan_Data.ndot; i++) { - wgts[i] = static_cast(Zoltan_Data.vwgt[i]); - } - } - - *ierr = ZOLTAN_OK; - } - - void zoltan_geom_ser(void * /*data*/, int /*ngid_ent*/, int /*nlid_ent*/, int nobj, - const ZOLTAN_ID_PTR gids, ZOLTAN_ID_PTR /*lids*/, int ndim, double *geom, - int *ierr) - { - /* Return coordinates for objects. - * gids are array indices for coordinate arrays. - * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. - */ - - for (size_t i = 0; i < static_cast(nobj); i++) { - size_t j = gids[i]; - geom[i * ndim] = Zoltan_Data.x[j]; - if (ndim > 1) { - geom[i * ndim + 1] = Zoltan_Data.y[j]; - } - if (ndim > 2) { - geom[i * ndim + 2] = Zoltan_Data.z[j]; - } - } - - *ierr = ZOLTAN_OK; - } - - template - void decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, - std::vector &elem_to_proc, const std::vector &weights, - IOSS_MAYBE_UNUSED INT dummy) - { - if (ranks == 1) { - return; - } - - size_t element_count = region.get_property("element_count").get_int(); - if (element_count != static_cast(static_cast(element_count))) { - fmt::print(stderr, "ERROR: Cannot have a mesh with more than 2.1 Billion elements in a " - "Zoltan decomposition.\n"); - exit(EXIT_FAILURE); - } - - auto [x, y, z] = get_element_centroid(region, dummy); - - // Copy mesh data and pointers into structure accessible from callback fns. - Zoltan_Data.ndot = element_count; - Zoltan_Data.vwgt = const_cast(Data(weights)); - - Zoltan_Data.x = Data(x); - Zoltan_Data.y = Data(y); - Zoltan_Data.z = Data(z); - - // Initialize Zoltan - int argc = 0; - char **argv = nullptr; - - float ver = 0.0; - Zoltan_Initialize(argc, argv, &ver); - fmt::print("Using Zoltan version {:.2}, method {}\n", static_cast(ver), - method); - - Zoltan zz(Ioss::ParallelUtils::comm_self()); - - // Register Callback functions - // Using global Zoltan_Data; could register it here instead as data field. - zz.Set_Num_Obj_Fn(zoltan_num_obj_ser, nullptr); - zz.Set_Obj_List_Fn(zoltan_obj_list_ser, nullptr); - zz.Set_Num_Geom_Fn(zoltan_num_dim_ser, nullptr); - zz.Set_Geom_Multi_Fn(zoltan_geom_ser, nullptr); - - // Set parameters for Zoltan - zz.Set_Param("DEBUG_LEVEL", "0"); - std::string str = fmt::format("{}", ranks); - zz.Set_Param("NUM_GLOBAL_PARTS", str); - zz.Set_Param("OBJ_WEIGHT_DIM", "1"); - zz.Set_Param("LB_METHOD", method); - zz.Set_Param("NUM_LID_ENTRIES", "0"); - zz.Set_Param("REMAP", "0"); - zz.Set_Param("RETURN_LISTS", "PARTITION_ASSIGNMENTS"); - zz.Set_Param("RCB_RECTILINEAR_BLOCKS", "1"); - - int num_global = sizeof(INT) / sizeof(ZOLTAN_ID_TYPE); - num_global = num_global < 1 ? 1 : num_global; - - // Call partitioner - int changes = 0; - int num_local = 0; - int num_import = 1; - int num_export = 1; - ZOLTAN_ID_PTR import_global_ids = nullptr; - ZOLTAN_ID_PTR import_local_ids = nullptr; - ZOLTAN_ID_PTR export_global_ids = nullptr; - ZOLTAN_ID_PTR export_local_ids = nullptr; - int *import_procs = nullptr; - int *import_to_part = nullptr; - int *export_procs = nullptr; - int *export_to_part = nullptr; - int rc = zz.LB_Partition(changes, num_global, num_local, num_import, import_global_ids, - import_local_ids, import_procs, import_to_part, num_export, - export_global_ids, export_local_ids, export_procs, export_to_part); - - if (rc != ZOLTAN_OK) { - fmt::print(stderr, "ERROR: Problem during call to Zoltan LB_Partition.\n"); - goto End; - } - - // Sanity check - if (element_count != static_cast(num_export)) { - fmt::print(stderr, "Sanity check failed; ndot {} != num_export {}.\n", element_count, - static_cast(num_export)); - goto End; - } - - elem_to_proc.resize(element_count); - for (size_t i = 0; i < element_count; i++) { - elem_to_proc[i] = export_to_part[i]; - } - - End: - /* Clean up */ - Zoltan::LB_Free_Part(&export_global_ids, &export_local_ids, &export_procs, &export_to_part); - Zoltan::LB_Free_Part(&export_global_ids, &export_local_ids, &export_procs, &export_to_part); - } - -template void line_decomp_modify(const Ioss::chain_t &element_chains, - std::vector &elem_to_proc, int proc_count); -template void line_decomp_modify(const Ioss::chain_t &element_chains, - std::vector &elem_to_proc, int proc_count); - - int line_decompose(Ioss::Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector element_to_proc) - { - - int dummy = 0; - Ioss::chain_t element_chains = - Ioss::generate_element_chains(region, surface_list, 0, dummy); - region.get_database()->progress("Ioss::generate_element_chains"); - - std::vector weights = - line_decomp_weights(element_chains, region.get_property("element_count").get_int()); - region.get_database()->progress("generate_element_weights"); - - double start = Ioss::Utils::timer(); - std::vector elem_to_proc; - decompose_zoltan(region, num_ranks, method, elem_to_proc, weights, dummy); - double end = Ioss::Utils::timer(); - fmt::print(stderr, "Decompose elements = {:.5}\n", end - start); - region.get_database()->progress("exit decompose_elements"); - - // Make sure all elements on a chain are on the same processor rank... - line_decomp_modify(element_chains, elem_to_proc, num_ranks); - - return 1; - } } // namespace namespace Ioex { @@ -620,7 +277,7 @@ namespace Ioex { Ioss::DatabaseIO *dbi = Ioss::IOFactory::create("exodus", filename, Ioss::READ_RESTART, Ioss::ParallelUtils::comm_self(), properties); Ioss::Region region(dbi, "line_decomp_region"); - int status = line_decompose(region, m_processorCount, m_decomposition.m_method, m_decomposition.m_decompExtra, element_to_proc_global); + int status = Ioss::line_decompose(region, m_processorCount, m_decomposition.m_method, m_decomposition.m_decompExtra, element_to_proc_global); } // Now broadcast the parts of the `element_to_proc_global` From 6a4ce2853884e4a5f335fd9e0039264e4ead712b Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Thu, 6 Jun 2024 14:55:02 -0600 Subject: [PATCH 07/33] IOSS: Add some missing includes --- .../libraries/ioss/src/Ioss_DecompositionUtils.C | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C index 2a4f7034b9..8adae559cf 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -8,6 +8,8 @@ #include #include +#include +#include #include "Ioss_DecompositionUtils.h" #include "Ioss_CodeTypes.h" @@ -19,6 +21,17 @@ #include "Ioss_ChainGenerator.h" +#include +#include + +#if !defined __NVCC__ +#include +#endif +#include +#include +#include +#include + #if !defined(NO_ZOLTAN_SUPPORT) #include // for Zoltan_Initialize #include // for Zoltan From 880649c5eb123379ce8c2593a399444deed85ade Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Thu, 6 Jun 2024 16:46:37 -0600 Subject: [PATCH 08/33] SLICE: Fix data_storage type --- packages/seacas/applications/slice/Slice.C | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/packages/seacas/applications/slice/Slice.C b/packages/seacas/applications/slice/Slice.C index cbd1961c7b..caf5b51341 100644 --- a/packages/seacas/applications/slice/Slice.C +++ b/packages/seacas/applications/slice/Slice.C @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -1374,7 +1375,7 @@ namespace { Ioss::PropertyManager properties = set_properties(interFace); Ioss::chain_t element_chains; - std::vector weights; + std::vector weights; if (interFace.lineDecomp_) { element_chains = Ioss::generate_element_chains(region, interFace.lineSurfaceList_, debug_level, dummy); @@ -1383,7 +1384,7 @@ namespace { if (interFace.decomposition_method() == "rcb" || interFace.decomposition_method() == "rib" || interFace.decomposition_method() == "hsfc") { weights = - line_decomp_weights(element_chains, region.get_property("element_count").get_int()); + Ioss::DecompUtils::line_decomp_weights(element_chains, region.get_property("element_count").get_int()); progress("generate_element_weights"); } } @@ -1400,11 +1401,11 @@ namespace { if (interFace.lineDecomp_) { // Make sure all elements on a chain are on the same processor rank... - line_decomp_modify(element_chains, elem_to_proc, interFace.processor_count()); + Ioss::DecompUtils::line_decomp_modify(element_chains, elem_to_proc, interFace.processor_count()); } if (debug_level & 32) { - output_decomposition_statistics(elem_to_proc, interFace.processor_count(), + Ioss::DecompUtils::output_decomposition_statistics(elem_to_proc, interFace.processor_count(), elem_to_proc.size()); } @@ -1428,7 +1429,7 @@ namespace { Ioss::MeshCopyOptions options{}; options.ints_64_bit = sizeof(INT) == 64; options.delete_timesteps = true; - options.data_storage_type = 2; + options.data_storage_type = 1; options.verbose = true; // Copy mesh portion of input region to the output region From 45f75b27a98f54417364fff18772fae884f99e7f Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Thu, 6 Jun 2024 16:47:38 -0600 Subject: [PATCH 09/33] Share decomp code between slice and ioss line decomp --- .../seacas/applications/slice/SL_Decompose.C | 241 +----------------- .../seacas/applications/slice/SL_Decompose.h | 16 +- .../ioss/src/Ioss_DecompositionUtils.C | 157 ++++++++++-- .../ioss/src/Ioss_DecompositionUtils.h | 28 +- .../ioss/src/exodus/Ioex_DecompositionData.C | 2 +- 5 files changed, 167 insertions(+), 277 deletions(-) diff --git a/packages/seacas/applications/slice/SL_Decompose.C b/packages/seacas/applications/slice/SL_Decompose.C index cb27614147..cc1bce0db2 100644 --- a/packages/seacas/applications/slice/SL_Decompose.C +++ b/packages/seacas/applications/slice/SL_Decompose.C @@ -173,7 +173,7 @@ namespace { struct { size_t ndot; /* Length of x, y, z, and part (== # of elements) */ - int *vwgt; /* vertex weights */ + float *vwgt; /* vertex weights */ double *x; /* x-coordinates */ double *y; /* y-coordinates */ double *z; /* z-coordinates */ @@ -248,7 +248,7 @@ namespace { template void decompose_zoltan(const Ioss::Region ®ion, int ranks, SystemInterface &interFace, - std::vector &elem_to_proc, const std::vector &weights, + std::vector &elem_to_proc, const std::vector &weights, IOSS_MAYBE_UNUSED INT dummy) { if (ranks == 1) { @@ -266,7 +266,7 @@ namespace { // Copy mesh data and pointers into structure accessible from callback fns. Zoltan_Data.ndot = element_count; - Zoltan_Data.vwgt = const_cast(Data(weights)); + Zoltan_Data.vwgt = const_cast(Data(weights)); if (interFace.ignore_x_ && interFace.ignore_y_) { Zoltan_Data.x = Data(z); @@ -464,64 +464,6 @@ namespace { } #endif - void output_histogram(const std::vector &proc_work, size_t avg_work, size_t median) - { - fmt::print("Work-per-processor Histogram\n"); - std::array histogram{}; - - auto wmin = *std::min_element(proc_work.begin(), proc_work.end()); - auto wmax = *std::max_element(proc_work.begin(), proc_work.end()); - - size_t hist_size = std::min(size_t(16), (wmax - wmin)); - hist_size = std::min(hist_size, proc_work.size()); - - if (hist_size <= 1) { - fmt::print("\tWork is the same on all processors; no histogram needed.\n\n"); - return; - } - - auto delta = double(wmax + 1 - wmin) / hist_size; - for (const auto &pw : proc_work) { - auto bin = size_t(double(pw - wmin) / delta); - SMART_ASSERT(bin < hist_size)(bin)(hist_size); - histogram[bin]++; - } - - size_t proc_width = Ioss::Utils::number_width(proc_work.size(), true); - size_t work_width = Ioss::Utils::number_width(wmax, true); - - fmt::print("\n\t{:^{}} {:^{}}\n", "Work Range", 2 * work_width + 2, "#", proc_width); - auto hist_max = *std::max_element(histogram.begin(), histogram.end()); - for (size_t i = 0; i < hist_size; i++) { - int max_star = 50; - int star_cnt = ((double)histogram[i] / hist_max * max_star); - std::string stars(star_cnt, '*'); - for (int j = 9; j < star_cnt;) { - stars[j] = '|'; - j += 10; - } - if (histogram[i] > 0 && star_cnt == 0) { - stars = '.'; - } - size_t w1 = wmin + size_t(i * delta); - size_t w2 = wmin + size_t((i + 1) * delta); - std::string postfix; - if (w1 <= avg_work && avg_work < w2) { - postfix += "average"; - } - if (w1 <= median && median < w2) { - if (!postfix.empty()) { - postfix += ", "; - } - postfix += "median"; - } - fmt::print("\t{:{}}..{:{}} ({:{}}):\t{:{}} {}\n", fmt::group_digits(w1), work_width, - fmt::group_digits(w2), work_width, fmt::group_digits(histogram[i]), proc_width, - stars, max_star, postfix); - } - fmt::print("\n"); - } - void scale_decomp(std::vector &elem_to_proc, int iscale, size_t num_proc) { // Do the scaling (integer division...) @@ -576,15 +518,15 @@ namespace { } // namespace template std::vector decompose_elements(const Ioss::Region ®ion, SystemInterface &interFace, - const std::vector &weights, + const std::vector &weights, IOSS_MAYBE_UNUSED int dummy); template std::vector decompose_elements(const Ioss::Region ®ion, SystemInterface &interFace, - const std::vector &weights, + const std::vector &weights, IOSS_MAYBE_UNUSED int64_t dummy); template std::vector decompose_elements(const Ioss::Region ®ion, SystemInterface &interFace, - const std::vector &weights, IOSS_MAYBE_UNUSED INT dummy) + const std::vector &weights, IOSS_MAYBE_UNUSED INT dummy) { progress(__func__); // Populate the 'elem_to_proc' vector with a mapping from element to processor. @@ -824,174 +766,3 @@ std::vector decompose_elements(const Ioss::Region ®ion, SystemInterface assert(elem_to_proc.size() == element_count); return elem_to_proc; } - -template -std::map> string_chains(const Ioss::chain_t &element_chains) -{ - std::map> chains; - - for (size_t i = 0; i < element_chains.size(); i++) { - auto &chain_entry = element_chains[i]; - if (chain_entry.link >= 0) { - chains[chain_entry.element].push_back(i + 1); - } - } - return chains; -} - -template std::vector line_decomp_weights(const Ioss::chain_t &element_chains, - size_t element_count); -template std::vector line_decomp_weights(const Ioss::chain_t &element_chains, - size_t element_count); - -template -std::vector line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count) -{ - auto chains = string_chains(element_chains); - - if ((debug_level & 16) != 0) { - for (const auto &[chain_root, chain_elements] : chains) { - fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); - } - } - - std::vector weights(element_count, 1); - // Now, for each chain... - for (const auto &[chain_root, chain_elements] : chains) { - // * Set the weights of all elements in the chain... - // * non-root = 0, root = length of chain. - for (const auto &element : chain_elements) { - weights[element - 1] = 0; - } - weights[chain_root - 1] = static_cast(chain_elements.size()); - } - return weights; -} - -template void line_decomp_modify(const Ioss::chain_t &element_chains, - std::vector &elem_to_proc, int proc_count); -template void line_decomp_modify(const Ioss::chain_t &element_chains, - std::vector &elem_to_proc, int proc_count); - -template -void line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, - int proc_count) -{ - // Get a map of all chains and the elements in the chains. Map key will be root. - auto chains = string_chains(element_chains); - - // Delta: elements added/removed from each processor... - std::vector delta(proc_count); - - // Now, for each chain... - for (const auto &[chain_root, chain_elements] : chains) { - if ((debug_level & 16) != 0) { - fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); - } - - std::vector chain_proc_count(proc_count); - - // * get processors used by elements in the chain... - for (const auto &element : chain_elements) { - auto proc = elem_to_proc[element - 1]; - chain_proc_count[proc]++; - } - - // * Now, subtract the `delta` from each count - for (int i = 0; i < proc_count; i++) { - chain_proc_count[i] -= delta[i]; - } - - // * Assign all elements in the chain to processor at chain root - // * Update the deltas for all processors that gain/lose elements... - auto root_proc = elem_to_proc[chain_root - 1]; - for (const auto &element : chain_elements) { - if (elem_to_proc[element - 1] != root_proc) { - auto old_proc = elem_to_proc[element - 1]; - elem_to_proc[element - 1] = root_proc; - delta[root_proc]++; - delta[old_proc]--; - } - } - } - - std::vector proc_element_count(proc_count); - for (auto proc : elem_to_proc) { - proc_element_count[proc]++; - } - if ((debug_level & 32) != 0) { - fmt::print("\nElements/Processor: {}\n", fmt::join(proc_element_count, ", ")); - fmt::print("Delta/Processor: {}\n", fmt::join(delta, ", ")); - } -} - -template void output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, - size_t number_elements); -template void output_decomposition_statistics(const std::vector &elem_to_proc, - int proc_count, size_t number_elements); -template -void output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, - size_t number_elements) -{ - // Output histogram of elements / rank... - std::vector elem_per_rank(proc_count); - for (INT proc : elem_to_proc) { - elem_per_rank[proc]++; - } - - size_t proc_width = Ioss::Utils::number_width(proc_count, false); - size_t work_width = Ioss::Utils::number_width(number_elements, true); - - auto min_work = *std::min_element(elem_per_rank.begin(), elem_per_rank.end()); - auto max_work = *std::max_element(elem_per_rank.begin(), elem_per_rank.end()); - size_t median = 0; - { - auto pw_copy(elem_per_rank); - std::nth_element(pw_copy.begin(), pw_copy.begin() + pw_copy.size() / 2, pw_copy.end()); - median = pw_copy[pw_copy.size() / 2]; - fmt::print("\nElements per processor:\n\tMinimum = {}, Maximum = {}, Median = {}, Ratio = " - "{:.3}\n\n", - fmt::group_digits(min_work), fmt::group_digits(max_work), fmt::group_digits(median), - (double)(max_work) / min_work); - } - if (min_work == max_work) { - fmt::print("\nWork on all processors is {}\n\n", fmt::group_digits(min_work)); - } - else { - int max_star = 40; - int min_star = max_star * ((double)min_work / (double)(max_work)); - min_star = std::max(1, min_star); - int delta = max_star - min_star; - - double avg_work = (double)number_elements / (double)proc_count; - for (size_t i = 0; i < elem_per_rank.size(); i++) { - int star_cnt = - (double)(elem_per_rank[i] - min_work) / (max_work - min_work) * delta + min_star; - std::string stars(star_cnt, '*'); - std::string format = "\tProcessor {:{}}, work = {:{}} ({:.2f})\t{}\n"; - if (elem_per_rank[i] == max_work) { - fmt::print( -#if !defined __NVCC__ - fg(fmt::color::red), -#endif - format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, - (double)elem_per_rank[i] / avg_work, stars); - } - else if (elem_per_rank[i] == min_work) { - fmt::print( -#if !defined __NVCC__ - fg(fmt::color::green), -#endif - format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, - elem_per_rank[i] / avg_work, stars); - } - else { - fmt::print(format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, - elem_per_rank[i] / avg_work, stars); - } - } - - // Output Histogram... - output_histogram(elem_per_rank, (size_t)avg_work, median); - } -} diff --git a/packages/seacas/applications/slice/SL_Decompose.h b/packages/seacas/applications/slice/SL_Decompose.h index f2e49c6361..8310570193 100644 --- a/packages/seacas/applications/slice/SL_Decompose.h +++ b/packages/seacas/applications/slice/SL_Decompose.h @@ -1,4 +1,4 @@ -// Copyright(C) 1999-2023 National Technology & Engineering Solutions +// Copyright(C) 1999-2024 National Technology & Engineering Solutions // of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with // NTESS, the U.S. Government retains certain rights in this software. // @@ -12,16 +12,4 @@ #pragma once template std::vector decompose_elements(const Ioss::Region ®ion, SystemInterface &interFace, - const std::vector &weights, IOSS_MAYBE_UNUSED INT dummy); - -template -void line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, - int proc_count); - -template -void output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, - size_t number_elements); - -template -std::vector line_decomp_weights(const Ioss::chain_t &element_chains, - size_t element_count); + const std::vector &weights, IOSS_MAYBE_UNUSED INT dummy); diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C index 8adae559cf..bbed0a98dc 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -18,19 +18,17 @@ #include "Ioss_NodeBlock.h" #include "Ioss_ElementBlock.h" #include "Ioss_Region.h" - -#include "Ioss_ChainGenerator.h" +#include "Ioss_SmartAssert.h" #include #include -#if !defined __NVCC__ -#include -#endif #include #include #include -#include +#if !defined __NVCC__ +#include +#endif #if !defined(NO_ZOLTAN_SUPPORT) #include // for Zoltan_Initialize @@ -174,6 +172,64 @@ std::map> string_chains(const Ioss::chain_t &element_ return {x, y, z}; } + void output_histogram(const std::vector &proc_work, size_t avg_work, size_t median) + { + fmt::print("Work-per-processor Histogram\n"); + std::array histogram{}; + + auto wmin = *std::min_element(proc_work.begin(), proc_work.end()); + auto wmax = *std::max_element(proc_work.begin(), proc_work.end()); + + size_t hist_size = std::min(size_t(16), (wmax - wmin)); + hist_size = std::min(hist_size, proc_work.size()); + + if (hist_size <= 1) { + fmt::print("\tWork is the same on all processors; no histogram needed.\n\n"); + return; + } + + auto delta = double(wmax + 1 - wmin) / hist_size; + for (const auto &pw : proc_work) { + auto bin = size_t(double(pw - wmin) / delta); + SMART_ASSERT(bin < hist_size)(bin)(hist_size); + histogram[bin]++; + } + + size_t proc_width = Ioss::Utils::number_width(proc_work.size(), true); + size_t work_width = Ioss::Utils::number_width(wmax, true); + + fmt::print("\n\t{:^{}} {:^{}}\n", "Work Range", 2 * work_width + 2, "#", proc_width); + auto hist_max = *std::max_element(histogram.begin(), histogram.end()); + for (size_t i = 0; i < hist_size; i++) { + int max_star = 50; + int star_cnt = ((double)histogram[i] / hist_max * max_star); + std::string stars(star_cnt, '*'); + for (int j = 9; j < star_cnt;) { + stars[j] = '|'; + j += 10; + } + if (histogram[i] > 0 && star_cnt == 0) { + stars = '.'; + } + size_t w1 = wmin + size_t(i * delta); + size_t w2 = wmin + size_t((i + 1) * delta); + std::string postfix; + if (w1 <= avg_work && avg_work < w2) { + postfix += "average"; + } + if (w1 <= median && median < w2) { + if (!postfix.empty()) { + postfix += ", "; + } + postfix += "median"; + } + fmt::print("\t{:{}}..{:{}} ({:{}}):\t{:{}} {}\n", fmt::group_digits(w1), work_width, + fmt::group_digits(w2), work_width, fmt::group_digits(histogram[i]), proc_width, + stars, max_star, postfix); + } + fmt::print("\n"); + } + template void decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, std::vector &elem_to_proc, const std::vector &weights, @@ -274,7 +330,7 @@ std::map> string_chains(const Ioss::chain_t &element_ } namespace Ioss { -int line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc) + int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc) { int dummy = 0; Ioss::chain_t element_chains = @@ -282,7 +338,7 @@ int line_decompose(Region ®ion, size_t num_ranks, const std::string &method, region.get_database()->progress("Ioss::generate_element_chains"); std::vector weights = - Ioss::line_decomp_weights(element_chains, region.get_property("element_count").get_int()); + line_decomp_weights(element_chains, region.get_property("element_count").get_int()); region.get_database()->progress("generate_element_weights"); double start = Ioss::Utils::timer(); @@ -293,13 +349,13 @@ int line_decompose(Region ®ion, size_t num_ranks, const std::string &method, region.get_database()->progress("exit decompose_elements"); // Make sure all elements on a chain are on the same processor rank... - Ioss::line_decomp_modify(element_chains, elem_to_proc, num_ranks); + line_decomp_modify(element_chains, elem_to_proc, num_ranks); return 1; } template -std::vector line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count) +std::vector DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count) { int debug_level = 0; auto chains = string_chains(element_chains); @@ -322,14 +378,14 @@ std::vector line_decomp_weights(const Ioss::chain_t &element_chains, } return weights; } -template std::vector line_decomp_weights(const Ioss::chain_t &element_chains, +template std::vector DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count); -template std::vector line_decomp_weights(const Ioss::chain_t &element_chains, +template std::vector DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count); template -void line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, +void DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, int proc_count) { int debug_level = 0; @@ -382,8 +438,79 @@ void line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &element_chains, +template void DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, int proc_count); -template void line_decomp_modify(const Ioss::chain_t &element_chains, +template void DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, int proc_count); + +template +void DecompUtils::output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, + size_t number_elements) +{ + // Output histogram of elements / rank... + std::vector elem_per_rank(proc_count); + for (INT proc : elem_to_proc) { + elem_per_rank[proc]++; + } + + size_t proc_width = Ioss::Utils::number_width(proc_count, false); + size_t work_width = Ioss::Utils::number_width(number_elements, true); + + auto min_work = *std::min_element(elem_per_rank.begin(), elem_per_rank.end()); + auto max_work = *std::max_element(elem_per_rank.begin(), elem_per_rank.end()); + size_t median = 0; + { + auto pw_copy(elem_per_rank); + std::nth_element(pw_copy.begin(), pw_copy.begin() + pw_copy.size() / 2, pw_copy.end()); + median = pw_copy[pw_copy.size() / 2]; + fmt::print("\nElements per processor:\n\tMinimum = {}, Maximum = {}, Median = {}, Ratio = " + "{:.3}\n\n", + fmt::group_digits(min_work), fmt::group_digits(max_work), fmt::group_digits(median), + (double)(max_work) / min_work); + } + if (min_work == max_work) { + fmt::print("\nWork on all processors is {}\n\n", fmt::group_digits(min_work)); + } + else { + int max_star = 40; + int min_star = max_star * ((double)min_work / (double)(max_work)); + min_star = std::max(1, min_star); + int delta = max_star - min_star; + + double avg_work = (double)number_elements / (double)proc_count; + for (size_t i = 0; i < elem_per_rank.size(); i++) { + int star_cnt = + (double)(elem_per_rank[i] - min_work) / (max_work - min_work) * delta + min_star; + std::string stars(star_cnt, '*'); + std::string format = "\tProcessor {:{}}, work = {:{}} ({:.2f})\t{}\n"; + if (elem_per_rank[i] == max_work) { + fmt::print( +#if !defined __NVCC__ + fg(fmt::color::red), +#endif + format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, + (double)elem_per_rank[i] / avg_work, stars); + } + else if (elem_per_rank[i] == min_work) { + fmt::print( +#if !defined __NVCC__ + fg(fmt::color::green), +#endif + format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, + elem_per_rank[i] / avg_work, stars); + } + else { + fmt::print(format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, + elem_per_rank[i] / avg_work, stars); + } + } + + // Output Histogram... + output_histogram(elem_per_rank, (size_t)avg_work, median); + } +} +template void DecompUtils::output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, + size_t number_elements); +template void DecompUtils::output_decomposition_statistics(const std::vector &elem_to_proc, + int proc_count, size_t number_elements); } diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h index 314a3d2a77..6f80105774 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h @@ -10,24 +10,28 @@ #include "ioss_export.h" #include "Ioss_ChainGenerator.h" +#include "Ioss_CodeTypes.h" #include "Ioss_Region.h" #include #include namespace Ioss { - template - void line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, - int proc_count); + class IOSS_EXPORT DecompUtils + { + public: + template + static void line_decomp_modify(const Ioss::chain_t &element_chains, + std::vector &elem_to_proc, int proc_count); - template - void output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, - size_t number_elements); + template + static void output_decomposition_statistics(const std::vector &elem_to_proc, + int proc_count, size_t number_elements); - template - std::vector line_decomp_weights(const Ioss::chain_t &element_chains, - size_t element_count); + template + static std::vector line_decomp_weights(const Ioss::chain_t &element_chains, + size_t element_count); - IOSS_EXPORT int line_decompose(Region ®ion, size_t num_ranks, const std::string &method, - const std::string &surface_list, - std::vector &element_to_proc); + static int line_decompose(Region ®ion, size_t num_ranks, const std::string &method, + const std::string &surface_list, std::vector &element_to_proc); + }; } // namespace Ioss diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C index 5663574f60..d4d09afda5 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C @@ -277,7 +277,7 @@ namespace Ioex { Ioss::DatabaseIO *dbi = Ioss::IOFactory::create("exodus", filename, Ioss::READ_RESTART, Ioss::ParallelUtils::comm_self(), properties); Ioss::Region region(dbi, "line_decomp_region"); - int status = Ioss::line_decompose(region, m_processorCount, m_decomposition.m_method, m_decomposition.m_decompExtra, element_to_proc_global); + int status = Ioss::DecompUtils::line_decompose(region, m_processorCount, m_decomposition.m_method, m_decomposition.m_decompExtra, element_to_proc_global); } // Now broadcast the parts of the `element_to_proc_global` From b1a380fb5892eac4d41f3eb3cc4fdf59fd79937c Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Thu, 6 Jun 2024 19:49:20 -0600 Subject: [PATCH 10/33] IOSS: Fix serial build --- packages/seacas/libraries/ioss/src/CMakeLists.txt | 2 ++ packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C | 1 + 2 files changed, 3 insertions(+) diff --git a/packages/seacas/libraries/ioss/src/CMakeLists.txt b/packages/seacas/libraries/ioss/src/CMakeLists.txt index ee0779d4fd..51e89b43f2 100644 --- a/packages/seacas/libraries/ioss/src/CMakeLists.txt +++ b/packages/seacas/libraries/ioss/src/CMakeLists.txt @@ -63,6 +63,8 @@ APPEND_GLOB(SOURCES ${DIR}/elements/*.C) IF (NOT TPL_ENABLE_MPI) LIST(REMOVE_ITEM SOURCES ${DIR}/Ioss_Decomposition.C) LIST(REMOVE_ITEM HEADERS ${DIR}/Ioss_Decomposition.h) + LIST(REMOVE_ITEM SOURCES ${DIR}/Ioss_DecompositionUtils.C) + LIST(REMOVE_ITEM HEADERS ${DIR}/Ioss_DecompositionUtils.h) ENDIF() TRIBITS_INCLUDE_DIRECTORIES( diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C index bbed0a98dc..1a6e8d50de 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -17,6 +17,7 @@ #include "Ioss_Decomposition.h" #include "Ioss_NodeBlock.h" #include "Ioss_ElementBlock.h" +#include "Ioss_ParallelUtils.h" #include "Ioss_Region.h" #include "Ioss_SmartAssert.h" From 0cb2faf17014178e0271eb27bc22d53c7c0b688c Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Thu, 6 Jun 2024 20:12:03 -0600 Subject: [PATCH 11/33] Pull element centroid into common utils class --- .../seacas/applications/slice/SL_Decompose.C | 42 +------- .../seacas/libraries/ioss/src/CMakeLists.txt | 2 - .../ioss/src/Ioss_DecompositionUtils.C | 97 ++++++++++--------- .../ioss/src/Ioss_DecompositionUtils.h | 4 + 4 files changed, 57 insertions(+), 88 deletions(-) diff --git a/packages/seacas/applications/slice/SL_Decompose.C b/packages/seacas/applications/slice/SL_Decompose.C index cc1bce0db2..16a43f4bd4 100644 --- a/packages/seacas/applications/slice/SL_Decompose.C +++ b/packages/seacas/applications/slice/SL_Decompose.C @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -127,45 +128,6 @@ namespace { } #if USE_ZOLTAN - template - std::tuple, std::vector, std::vector> - get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED INT dummy) - { - size_t element_count = region.get_property("element_count").get_int(); - - // The zoltan methods supported in slice are all geometry based - // and use the element centroid. - std::vector x(element_count); - std::vector y(element_count); - std::vector z(element_count); - - const auto *nb = region.get_node_blocks()[0]; - std::vector coor; - nb->get_field_data("mesh_model_coordinates", coor); - - const auto &blocks = region.get_element_blocks(); - size_t el = 0; - for (auto &eb : blocks) { - std::vector connectivity; - eb->get_field_data("connectivity_raw", connectivity); - size_t blk_element_count = eb->entity_count(); - size_t blk_element_nodes = eb->topology()->number_nodes(); - - for (size_t j = 0; j < blk_element_count; j++) { - for (size_t k = 0; k < blk_element_nodes; k++) { - auto node = connectivity[j * blk_element_nodes + k] - 1; - x[el] += coor[node * 3 + 0]; - y[el] += coor[node * 3 + 1]; - z[el] += coor[node * 3 + 2]; - } - x[el] /= blk_element_nodes; - y[el] /= blk_element_nodes; - z[el] /= blk_element_nodes; - el++; - } - } - return {x, y, z}; - } /*****************************************************************************/ /***** Global data structure used by Zoltan callbacks. *****/ /***** Could implement Zoltan callbacks without global data structure, *****/ @@ -262,7 +224,7 @@ namespace { exit(EXIT_FAILURE); } - auto [x, y, z] = get_element_centroid(region, dummy); + auto [x, y, z] = Ioss::DecompUtils::get_element_centroid(region, dummy); // Copy mesh data and pointers into structure accessible from callback fns. Zoltan_Data.ndot = element_count; diff --git a/packages/seacas/libraries/ioss/src/CMakeLists.txt b/packages/seacas/libraries/ioss/src/CMakeLists.txt index 51e89b43f2..ee0779d4fd 100644 --- a/packages/seacas/libraries/ioss/src/CMakeLists.txt +++ b/packages/seacas/libraries/ioss/src/CMakeLists.txt @@ -63,8 +63,6 @@ APPEND_GLOB(SOURCES ${DIR}/elements/*.C) IF (NOT TPL_ENABLE_MPI) LIST(REMOVE_ITEM SOURCES ${DIR}/Ioss_Decomposition.C) LIST(REMOVE_ITEM HEADERS ${DIR}/Ioss_Decomposition.h) - LIST(REMOVE_ITEM SOURCES ${DIR}/Ioss_DecompositionUtils.C) - LIST(REMOVE_ITEM HEADERS ${DIR}/Ioss_DecompositionUtils.h) ENDIF() TRIBITS_INCLUDE_DIRECTORIES( diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C index 1a6e8d50de..79e60d316f 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -11,19 +11,15 @@ #include #include -#include "Ioss_DecompositionUtils.h" #include "Ioss_CodeTypes.h" #include "Ioss_ChainGenerator.h" -#include "Ioss_Decomposition.h" +#include "Ioss_DecompositionUtils.h" #include "Ioss_NodeBlock.h" #include "Ioss_ElementBlock.h" #include "Ioss_ParallelUtils.h" #include "Ioss_Region.h" #include "Ioss_SmartAssert.h" -#include -#include - #include #include #include @@ -133,46 +129,6 @@ std::map> string_chains(const Ioss::chain_t &element_ return chains; } - template - std::tuple, std::vector, std::vector> - get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED INT dummy) - { - size_t element_count = region.get_property("element_count").get_int(); - - // The zoltan methods supported in slice are all geometry based - // and use the element centroid. - std::vector x(element_count); - std::vector y(element_count); - std::vector z(element_count); - - const auto *nb = region.get_node_blocks()[0]; - std::vector coor; - nb->get_field_data("mesh_model_coordinates", coor); - - const auto &blocks = region.get_element_blocks(); - size_t el = 0; - for (auto &eb : blocks) { - std::vector connectivity; - eb->get_field_data("connectivity_raw", connectivity); - size_t blk_element_count = eb->entity_count(); - size_t blk_element_nodes = eb->topology()->number_nodes(); - - for (size_t j = 0; j < blk_element_count; j++) { - for (size_t k = 0; k < blk_element_nodes; k++) { - auto node = connectivity[j * blk_element_nodes + k] - 1; - x[el] += coor[node * 3 + 0]; - y[el] += coor[node * 3 + 1]; - z[el] += coor[node * 3 + 2]; - } - x[el] /= blk_element_nodes; - y[el] /= blk_element_nodes; - z[el] /= blk_element_nodes; - el++; - } - } - return {x, y, z}; - } - void output_histogram(const std::vector &proc_work, size_t avg_work, size_t median) { fmt::print("Work-per-processor Histogram\n"); @@ -247,7 +203,7 @@ std::map> string_chains(const Ioss::chain_t &element_ exit(EXIT_FAILURE); } - auto [x, y, z] = get_element_centroid(region, dummy); + auto [x, y, z] = Ioss::DecompUtils::get_element_centroid(region, dummy); // Copy mesh data and pointers into structure accessible from callback fns. Zoltan_Data.ndot = element_count; @@ -514,4 +470,53 @@ template void DecompUtils::output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, size_t number_elements); + + template + std::tuple, std::vector, std::vector> + DecompUtils::get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED INT dummy) + { + size_t element_count = region.get_property("element_count").get_int(); + + // The zoltan methods supported in slice are all geometry based + // and use the element centroid. + std::vector x(element_count); + std::vector y(element_count); + std::vector z(element_count); + + const auto *nb = region.get_node_blocks()[0]; + std::vector coor; + nb->get_field_data("mesh_model_coordinates", coor); + + const auto &blocks = region.get_element_blocks(); + size_t el = 0; + for (auto &eb : blocks) { + std::vector connectivity; + eb->get_field_data("connectivity_raw", connectivity); + size_t blk_element_count = eb->entity_count(); + size_t blk_element_nodes = eb->topology()->number_nodes(); + + for (size_t j = 0; j < blk_element_count; j++) { + for (size_t k = 0; k < blk_element_nodes; k++) { + auto node = connectivity[j * blk_element_nodes + k] - 1; + x[el] += coor[node * 3 + 0]; + y[el] += coor[node * 3 + 1]; + z[el] += coor[node * 3 + 2]; + } + x[el] /= blk_element_nodes; + y[el] /= blk_element_nodes; + z[el] /= blk_element_nodes; + el++; + } + } + return {x, y, z}; + } + + template + std::tuple, std::vector, std::vector> + DecompUtils::get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED int dummy); + + template + std::tuple, std::vector, std::vector> + DecompUtils::get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED int64_t dummy); + } diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h index 6f80105774..64f7549839 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h @@ -33,5 +33,9 @@ namespace Ioss { static int line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc); + + template + static std::tuple, std::vector, std::vector> + get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED INT dummy); }; } // namespace Ioss From 747c2a6be324353ca8c316c8c74c283fc4212df7 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Fri, 7 Jun 2024 07:29:49 -0600 Subject: [PATCH 12/33] Templative line_decompose; pass correct vector --- .../libraries/ioss/src/Ioss_DecompositionUtils.C | 15 +++++++++------ .../libraries/ioss/src/Ioss_DecompositionUtils.h | 3 ++- .../ioss/src/exodus/Ioex_DecompositionData.C | 10 ++++++++-- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C index 79e60d316f..60bb034da6 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -287,10 +287,11 @@ std::map> string_chains(const Ioss::chain_t &element_ } namespace Ioss { - int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc) + template + int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc, INT dummy) { - int dummy = 0; - Ioss::chain_t element_chains = + + Ioss::chain_t element_chains = Ioss::generate_element_chains(region, surface_list, 0, dummy); region.get_database()->progress("Ioss::generate_element_chains"); @@ -299,18 +300,20 @@ namespace Ioss { region.get_database()->progress("generate_element_weights"); double start = Ioss::Utils::timer(); - std::vector elem_to_proc; - decompose_zoltan(region, num_ranks, method, elem_to_proc, weights, dummy); + decompose_zoltan(region, num_ranks, method, element_to_proc, weights, dummy); double end = Ioss::Utils::timer(); fmt::print(stderr, "Decompose elements = {:.5}\n", end - start); region.get_database()->progress("exit decompose_elements"); // Make sure all elements on a chain are on the same processor rank... - line_decomp_modify(element_chains, elem_to_proc, num_ranks); + line_decomp_modify(element_chains, element_to_proc, num_ranks); return 1; } + template int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc, int dummy); + template int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc, int64_t dummy); + template std::vector DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count) { diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h index 64f7549839..92a0a6d9bd 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h @@ -31,8 +31,9 @@ namespace Ioss { static std::vector line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count); + template static int line_decompose(Region ®ion, size_t num_ranks, const std::string &method, - const std::string &surface_list, std::vector &element_to_proc); + const std::string &surface_list, std::vector &element_to_proc, INT dummy); template static std::tuple, std::vector, std::vector> diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C index d4d09afda5..fefc47a56b 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C @@ -277,9 +277,15 @@ namespace Ioex { Ioss::DatabaseIO *dbi = Ioss::IOFactory::create("exodus", filename, Ioss::READ_RESTART, Ioss::ParallelUtils::comm_self(), properties); Ioss::Region region(dbi, "line_decomp_region"); - int status = Ioss::DecompUtils::line_decompose(region, m_processorCount, m_decomposition.m_method, m_decomposition.m_decompExtra, element_to_proc_global); - } + int status = 0; + if (dbi->int_byte_size_api() == 8) { + status = Ioss::DecompUtils::line_decompose(region, m_processorCount, m_decomposition.m_method, m_decomposition.m_decompExtra, element_to_proc_global, int64_t(0)); + } + else { + status = Ioss::DecompUtils::line_decompose(region, m_processorCount, m_decomposition.m_method, m_decomposition.m_decompExtra, element_to_proc_global, int(0)); + } + } // Now broadcast the parts of the `element_to_proc_global` // vector to the owning ranks in the initial linear // decomposition... From 53550984cf529c8da23e0e1a4381703ce656192a Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Fri, 7 Jun 2024 15:36:16 -0600 Subject: [PATCH 13/33] Handle specified in guided_decompose --- .../seacas/libraries/ioss/src/Ioss_Decomposition.C | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C index 0760e990ff..8117089b8b 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C +++ b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C @@ -680,7 +680,7 @@ namespace Ioss { template void Decomposition::guided_decompose() { show_progress(__func__); - assert(m_method == "MAP" || m_method == "VARIABLE"); + assert(m_method == "MAP" || m_method == "VARIABLE" || m_method == "SPECIFIED"); // - Read my portion of the map / variable. // - count # of exports to each rank // -- exportElementCount[proc] @@ -689,13 +689,7 @@ namespace Ioss { // - communicate to all proc -- becomes importElementMap. // Create `exportElementIndex` from `exportElementCount` - std::string label; - if (m_method == "MAP") { - label = "map"; - } - else { - label = "variable"; - } + std::string label = m_method; // If the "m_decompExtra" string contains a comma, then the // value following the comma is either an integer "scale" @@ -713,7 +707,7 @@ namespace Ioss { // [0..m_processorCount). double scale = 1.0; auto pos = m_decompExtra.find(","); - if (pos != std::string::npos) { + if (m_method != "SPECIFIED" && pos != std::string::npos) { // Extract the string following the comma... auto scale_str = m_decompExtra.substr(pos + 1); if (scale_str == "AUTO" || scale_str == "auto") { From 0c0300852389c70ac7d732ed5a09b691b0a602c3 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Mon, 10 Jun 2024 10:04:17 -0600 Subject: [PATCH 14/33] Remove unused function --- packages/seacas/libraries/ioss/src/Ioss_Decomposition.C | 9 --------- 1 file changed, 9 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C index 8117089b8b..25f40e47c9 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C +++ b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C @@ -1087,15 +1087,6 @@ namespace Ioss { } #endif - template void Decomposition::line_decompose() - { - show_progress(__func__); - // Currently, we perform the line decomposition in serial on rank - // 0 and then broadcast the `elementToProc` map to each rank which - // then does a guided decompostion. - - } - #if !defined(NO_ZOLTAN_SUPPORT) template void Decomposition::zoltan_decompose(Zoltan &zz) From e939fcb44c31730622568bb76d140f8fba998d69 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Mon, 10 Jun 2024 10:05:24 -0600 Subject: [PATCH 15/33] elementToProc does not need 64-bit range --- .../libraries/ioss/src/Ioss_Decomposition.h | 2 +- .../ioss/src/Ioss_DecompositionUtils.C | 9 ++----- .../ioss/src/Ioss_DecompositionUtils.h | 5 ++-- .../ioss/src/exodus/Ioex_DecompositionData.C | 24 +++++++++++-------- 4 files changed, 19 insertions(+), 21 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h index f5a239b635..14ca00bd4e 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h +++ b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.h @@ -806,7 +806,7 @@ namespace Ioss { bool m_showProgress{false}; bool m_showHWM{false}; - std::vector m_elementToProc; // Used by "MAP" scheme... + std::vector m_elementToProc; // Used by "MAP" scheme... std::vector m_centroids; std::vector m_weights; std::vector m_pointer; // Index into adjacency, processor list for each element... diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C index 60bb034da6..851d9d4a76 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -403,13 +403,12 @@ template void DecompUtils::line_decomp_modify(const Ioss::chain_t &element_ template void DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, int proc_count); -template -void DecompUtils::output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, +void DecompUtils::output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, size_t number_elements) { // Output histogram of elements / rank... std::vector elem_per_rank(proc_count); - for (INT proc : elem_to_proc) { + for (int proc : elem_to_proc) { elem_per_rank[proc]++; } @@ -469,10 +468,6 @@ void DecompUtils::output_decomposition_statistics(const std::vector &elem_t output_histogram(elem_per_rank, (size_t)avg_work, median); } } -template void DecompUtils::output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, - size_t number_elements); -template void DecompUtils::output_decomposition_statistics(const std::vector &elem_to_proc, - int proc_count, size_t number_elements); template std::tuple, std::vector, std::vector> diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h index 92a0a6d9bd..4e17a1ae5b 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h @@ -21,10 +21,9 @@ namespace Ioss { public: template static void line_decomp_modify(const Ioss::chain_t &element_chains, - std::vector &elem_to_proc, int proc_count); + std::vector &element_to_proc, int proc_count); - template - static void output_decomposition_statistics(const std::vector &elem_to_proc, + static void output_decomposition_statistics(const std::vector &element_to_proc, int proc_count, size_t number_elements); template diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C index fefc47a56b..b7803370e4 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C @@ -201,8 +201,16 @@ namespace Ioex { for (int i = 0; i < map_count; i++) { if (std::string(names[i]) == map_name) { m_decomposition.m_elementToProc.resize(decomp_elem_count()); - ex_get_partial_num_map(filePtr, EX_ELEM_MAP, i + 1, decomp_elem_offset() + 1, - decomp_elem_count(), Data(m_decomposition.m_elementToProc)); + if (sizeof(INT) == 4) { + ex_get_partial_num_map(filePtr, EX_ELEM_MAP, i + 1, decomp_elem_offset() + 1, + decomp_elem_count(), Data(m_decomposition.m_elementToProc)); + } + else { + std::vector tmp_map(decomp_elem_count()); + ex_get_partial_num_map(filePtr, EX_ELEM_MAP, i + 1, decomp_elem_offset() + 1, + decomp_elem_count(), Data(tmp_map)); + std::copy(tmp_map.begin(), tmp_map.end(), m_decomposition.m_elementToProc.begin()); + } map_read = true; break; } @@ -278,13 +286,7 @@ namespace Ioex { Ioss::ParallelUtils::comm_self(), properties); Ioss::Region region(dbi, "line_decomp_region"); - int status = 0; - if (dbi->int_byte_size_api() == 8) { - status = Ioss::DecompUtils::line_decompose(region, m_processorCount, m_decomposition.m_method, m_decomposition.m_decompExtra, element_to_proc_global, int64_t(0)); - } - else { - status = Ioss::DecompUtils::line_decompose(region, m_processorCount, m_decomposition.m_method, m_decomposition.m_decompExtra, element_to_proc_global, int(0)); - } + int status = Ioss::DecompUtils::line_decompose(region, m_processorCount, m_decomposition.m_method, m_decomposition.m_decompExtra, element_to_proc_global, INT(0)); } // Now broadcast the parts of the `element_to_proc_global` // vector to the owning ranks in the initial linear @@ -306,7 +308,9 @@ namespace Ioex { displs[i] = sum; sum += sendcounts[i]; } - MPI_Scatterv(Data(element_to_proc_global), Data(sendcounts), Data(displs), MPI_INT, Data(m_decomposition.m_elementToProc), decomp_elem_count(), Ioss::mpi_type(INT(0)), 0, m_decomposition.m_comm); + MPI_Scatterv(Data(element_to_proc_global), Data(sendcounts), Data(displs), MPI_INT, + Data(m_decomposition.m_elementToProc), decomp_elem_count(), MPI_INT, 0, m_decomposition.m_comm); + m_decomposition.m_method = "SPECIFIED"; } From b8c16c2a84455baf5464abda92e6debeaca2214d Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Mon, 10 Jun 2024 10:16:35 -0600 Subject: [PATCH 16/33] EXPLORE: Fix behavior after bad parse warning --- packages/seacas/applications/explore/exp_qainfo.blk | 5 +++-- packages/seacas/applications/explore/exp_rixid.f | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/seacas/applications/explore/exp_qainfo.blk b/packages/seacas/applications/explore/exp_qainfo.blk index df09d169cd..b43b841aa3 100644 --- a/packages/seacas/applications/explore/exp_qainfo.blk +++ b/packages/seacas/applications/explore/exp_qainfo.blk @@ -8,8 +8,8 @@ C See packages/seacas/LICENSE for details QAINFO(2) = ' ' QAINFO(3) = ' ' - QAINFO(2)(:8) = '20240418' - QAINFO(3)(:8) = ' 4.02' + QAINFO(2)(:8) = '20240610' + QAINFO(3)(:8) = ' 4.03' c..Dynamic dimensioning of block names+other changes c..compress output of distribution factors @@ -77,3 +77,4 @@ c..Add select nodes sset {id}... c..Fix memory overrun in check routines c..Call MDFREE() c..Refactor element select to allow add +c..Fix behavior after bad parse warning diff --git a/packages/seacas/applications/explore/exp_rixid.f b/packages/seacas/applications/explore/exp_rixid.f index c58e7e59a8..12e5a3e624 100644 --- a/packages/seacas/applications/explore/exp_rixid.f +++ b/packages/seacas/applications/explore/exp_rixid.f @@ -65,13 +65,13 @@ SUBROUTINE RIXID (INLINE, IFLD, INTYP, CFIELD, IFIELD, IF (FFMATC (IFLD, INTYP, CFIELD, 'ADD', 3)) THEN CALL FFADDC ('ADD', INLINE) ELSE + NUMSEL = 0 IF (.NOT. FFNUMB (IFLD, INTYP)) THEN ERRMSG = & 'Expected "OFF" or "ADD" or ' // SELMSG // ' range' CALL PRTERR ('CMDERR', ERRMSG(:LENSTR(ERRMSG))) GOTO 130 END IF - NUMSEL = 0 END IF 110 CONTINUE From 580821cba83fda4d98813247f383926bdbf85f52 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Mon, 10 Jun 2024 10:38:34 -0600 Subject: [PATCH 17/33] EXPLORE: Better warning/info message on SELECT --- packages/seacas/applications/explore/exp_comand.f | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/seacas/applications/explore/exp_comand.f b/packages/seacas/applications/explore/exp_comand.f index 73479c3b08..92a73869d2 100644 --- a/packages/seacas/applications/explore/exp_comand.f +++ b/packages/seacas/applications/explore/exp_comand.f @@ -319,7 +319,8 @@ SUBROUTINE COMAND (A, IA, EXODUS, DBNAME, QAREC, INFO, IF (LISTYP .EQ. ' ') THEN CALL ABRSTR (LISTYP, WORD, SELTBL) IF (LISTYP .NE. ' ') THEN - CALL PRTERR ('CMDREQ', 'Please use the SELECT command') + CALL PRTERR ('CMDREQ', 'Please use the SELECT ' + $ // LISTYP(:LENSTR(LISTYP)) // ' command') VERB = 'SELECT' ELSE LISTYP = WORD From 6888eafe1be5ca92c53392a78967f7f43eafaa34 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Mon, 10 Jun 2024 11:30:18 -0600 Subject: [PATCH 18/33] Minor rearrange include files --- .../seacas/libraries/ioss/src/Ioss_DecompositionUtils.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h index 4e17a1ae5b..9386bba275 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h @@ -7,14 +7,14 @@ */ #pragma once -#include "ioss_export.h" - #include "Ioss_ChainGenerator.h" #include "Ioss_CodeTypes.h" #include "Ioss_Region.h" #include #include +#include "ioss_export.h" + namespace Ioss { class IOSS_EXPORT DecompUtils { @@ -32,7 +32,8 @@ namespace Ioss { template static int line_decompose(Region ®ion, size_t num_ranks, const std::string &method, - const std::string &surface_list, std::vector &element_to_proc, INT dummy); + const std::string &surface_list, std::vector &element_to_proc, + INT dummy); template static std::tuple, std::vector, std::vector> From 4c6bfea8bb121a16ac7ce0afd9bc0d41b2348993 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Mon, 10 Jun 2024 13:07:00 -0600 Subject: [PATCH 19/33] IOSS: See if this fixes/affects msys2 build --- packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C index 851d9d4a76..ff2cceeb8f 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -509,11 +509,11 @@ void DecompUtils::output_decomposition_statistics(const std::vector &elem_t return {x, y, z}; } - template +template IOSS_EXPORT std::tuple, std::vector, std::vector> DecompUtils::get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED int dummy); - template +template IOSS_EXPORT std::tuple, std::vector, std::vector> DecompUtils::get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED int64_t dummy); From 7a1bec91856bd71368a76ac1ee7844ee042ca6f9 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Mon, 10 Jun 2024 13:30:21 -0600 Subject: [PATCH 20/33] IOSS: Add some logging/hwm code to line decomp --- packages/seacas/libraries/ioss/src/Ioss_Decomposition.C | 2 +- .../libraries/ioss/src/exodus/Ioex_DecompositionData.C | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C index 25f40e47c9..6f2980fd1c 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C +++ b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C @@ -462,7 +462,7 @@ namespace Ioss { if (m_method == "SPECIFIED") { // Currently used for line decomposition with another decomposition type. // The line-modified decomposition is done prior to this and builds the - // m_elementToProc which is then used here to decompose the elements... + // `m_elementToProc` which is then used here to decompose the elements... guided_decompose(); } diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C index b7803370e4..39ac00bd91 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C @@ -280,13 +280,15 @@ namespace Ioex { // do do the parallel distributions/decomposition of the elements assuming a "guided" decomposition. std::vector element_to_proc_global{}; + m_decomposition.show_progress("\tline_decompose begin"); if (m_processor == 0) { Ioss::PropertyManager properties; Ioss::DatabaseIO *dbi = Ioss::IOFactory::create("exodus", filename, Ioss::READ_RESTART, Ioss::ParallelUtils::comm_self(), properties); Ioss::Region region(dbi, "line_decomp_region"); - int status = Ioss::DecompUtils::line_decompose(region, m_processorCount, m_decomposition.m_method, m_decomposition.m_decompExtra, element_to_proc_global, INT(0)); + int status = Ioss::DecompUtils::line_decompose(region, m_processorCount, m_decomposition.m_method, + m_decomposition.m_decompExtra, element_to_proc_global, INT(0)); } // Now broadcast the parts of the `element_to_proc_global` // vector to the owning ranks in the initial linear @@ -311,6 +313,7 @@ namespace Ioex { MPI_Scatterv(Data(element_to_proc_global), Data(sendcounts), Data(displs), MPI_INT, Data(m_decomposition.m_elementToProc), decomp_elem_count(), MPI_INT, 0, m_decomposition.m_comm); m_decomposition.m_method = "SPECIFIED"; + m_decomposition.show_progress("\tline_decompose end"); } From f087ab2e661a9fad476be55984dd5903b9efc305 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Mon, 10 Jun 2024 14:03:47 -0600 Subject: [PATCH 21/33] IOSS: Another try to see how affects msys2 build --- .../libraries/ioss/src/Ioss_DecompositionUtils.C | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C index ff2cceeb8f..0a4dad45ce 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -311,8 +311,8 @@ namespace Ioss { return 1; } - template int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc, int dummy); - template int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc, int64_t dummy); + template IOSS_EXPORT int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc, int dummy); + template IOSS_EXPORT int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc, int64_t dummy); template std::vector DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count) @@ -338,9 +338,9 @@ std::vector DecompUtils::line_decomp_weights(const Ioss::chain_t &el } return weights; } -template std::vector DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, +template IOSS_EXPORT std::vector DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count); -template std::vector DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, +template IOSS_EXPORT std::vector DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count); @@ -398,9 +398,9 @@ void DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, s } -template void DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, +template IOSS_EXPORT void DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, int proc_count); -template void DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, +template IOSS_EXPORT void DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, int proc_count); void DecompUtils::output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, From 9d64d7e28d000c44e841aa941bc1d3d1ede2d19c Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Mon, 10 Jun 2024 14:16:13 -0600 Subject: [PATCH 22/33] IOSS: Better hwm logging output --- .../seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C index 39ac00bd91..10ac7b9044 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C @@ -280,7 +280,7 @@ namespace Ioex { // do do the parallel distributions/decomposition of the elements assuming a "guided" decomposition. std::vector element_to_proc_global{}; - m_decomposition.show_progress("\tline_decompose begin"); + m_decomposition.show_progress("***LINE_DECOMPOSE BEGIN***"); if (m_processor == 0) { Ioss::PropertyManager properties; Ioss::DatabaseIO *dbi = Ioss::IOFactory::create("exodus", filename, Ioss::READ_RESTART, @@ -313,7 +313,7 @@ namespace Ioex { MPI_Scatterv(Data(element_to_proc_global), Data(sendcounts), Data(displs), MPI_INT, Data(m_decomposition.m_elementToProc), decomp_elem_count(), MPI_INT, 0, m_decomposition.m_comm); m_decomposition.m_method = "SPECIFIED"; - m_decomposition.show_progress("\tline_decompose end"); + m_decomposition.show_progress("***LINE_DECOMPOSE END***"); } From e71c9a07892706dca36a2687c7d69a8f7c793944 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Mon, 10 Jun 2024 15:03:16 -0600 Subject: [PATCH 23/33] Unify Slice and DecompositionUtils zoltan_decompose --- .../seacas/applications/slice/SL_Decompose.C | 218 +----------------- .../ioss/src/Ioss_DecompositionUtils.C | 57 ++++- .../ioss/src/Ioss_DecompositionUtils.h | 6 + 3 files changed, 57 insertions(+), 224 deletions(-) diff --git a/packages/seacas/applications/slice/SL_Decompose.C b/packages/seacas/applications/slice/SL_Decompose.C index 16a43f4bd4..bc87a9742e 100644 --- a/packages/seacas/applications/slice/SL_Decompose.C +++ b/packages/seacas/applications/slice/SL_Decompose.C @@ -36,11 +36,6 @@ using idx_t = int; #endif -#if USE_ZOLTAN -#include // for Zoltan_Initialize -#include // for Zoltan -#endif - extern int debug_level; extern double seacas_timer(); extern void progress(const std::string &output); @@ -127,209 +122,6 @@ namespace { [](char a, char b) { return std::tolower(a) == std::tolower(b); }); } -#if USE_ZOLTAN - /*****************************************************************************/ - /***** Global data structure used by Zoltan callbacks. *****/ - /***** Could implement Zoltan callbacks without global data structure, *****/ - /***** but using the global data structure makes implementation quick. *****/ - struct - { - size_t ndot; /* Length of x, y, z, and part (== # of elements) */ - float *vwgt; /* vertex weights */ - double *x; /* x-coordinates */ - double *y; /* y-coordinates */ - double *z; /* z-coordinates */ - } Zoltan_Data; - - /*****************************************************************************/ - /***** ZOLTAN CALLBACK FUNCTIONS *****/ - int zoltan_num_dim(void * /*data*/, int *ierr) - { - /* Return dimensionality of coordinate data. - * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. - */ - *ierr = ZOLTAN_OK; - if (Zoltan_Data.z != nullptr) { - return 3; - } - if (Zoltan_Data.y != nullptr) { - return 2; - } - return 1; - } - - int zoltan_num_obj(void * /*data*/, int *ierr) - { - /* Return number of objects. - * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. - */ - *ierr = ZOLTAN_OK; - return Zoltan_Data.ndot; - } - - void zoltan_obj_list(void * /*data*/, int /*ngid_ent*/, int /*nlid_ent*/, ZOLTAN_ID_PTR gids, - ZOLTAN_ID_PTR /*lids*/, int wdim, float *wgts, int *ierr) - { - /* Return list of object IDs. - * Return only global IDs; don't need local IDs since running in serial. - * gids are array indices for coordinate and vwgts arrays. - * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. - */ - std::iota(gids, gids + Zoltan_Data.ndot, 0); - if (wdim != 0) { - for (size_t i = 0; i < Zoltan_Data.ndot; i++) { - wgts[i] = static_cast(Zoltan_Data.vwgt[i]); - } - } - - *ierr = ZOLTAN_OK; - } - - void zoltan_geom(void * /*data*/, int /*ngid_ent*/, int /*nlid_ent*/, int nobj, - const ZOLTAN_ID_PTR gids, ZOLTAN_ID_PTR /*lids*/, int ndim, double *geom, - int *ierr) - { - /* Return coordinates for objects. - * gids are array indices for coordinate arrays. - * Using global data structure Zoltan_Data, initialized in ZOLTAN_RCB_assign. - */ - - for (size_t i = 0; i < static_cast(nobj); i++) { - size_t j = gids[i]; - geom[i * ndim] = Zoltan_Data.x[j]; - if (ndim > 1) { - geom[i * ndim + 1] = Zoltan_Data.y[j]; - } - if (ndim > 2) { - geom[i * ndim + 2] = Zoltan_Data.z[j]; - } - } - - *ierr = ZOLTAN_OK; - } - - template - void decompose_zoltan(const Ioss::Region ®ion, int ranks, SystemInterface &interFace, - std::vector &elem_to_proc, const std::vector &weights, - IOSS_MAYBE_UNUSED INT dummy) - { - if (ranks == 1) { - return; - } - - size_t element_count = region.get_property("element_count").get_int(); - if (element_count != static_cast(static_cast(element_count))) { - fmt::print(stderr, "ERROR: Cannot have a mesh with more than 2.1 Billion elements in a " - "Zoltan decomposition.\n"); - exit(EXIT_FAILURE); - } - - auto [x, y, z] = Ioss::DecompUtils::get_element_centroid(region, dummy); - - // Copy mesh data and pointers into structure accessible from callback fns. - Zoltan_Data.ndot = element_count; - Zoltan_Data.vwgt = const_cast(Data(weights)); - - if (interFace.ignore_x_ && interFace.ignore_y_) { - Zoltan_Data.x = Data(z); - } - else if (interFace.ignore_x_ && interFace.ignore_z_) { - Zoltan_Data.x = Data(y); - } - else if (interFace.ignore_y_ && interFace.ignore_z_) { - Zoltan_Data.x = Data(x); - } - else if (interFace.ignore_x_) { - Zoltan_Data.x = Data(y); - Zoltan_Data.y = Data(z); - } - else if (interFace.ignore_y_) { - Zoltan_Data.x = Data(x); - Zoltan_Data.y = Data(z); - } - else if (!interFace.ignore_z_) { - Zoltan_Data.x = Data(x); - Zoltan_Data.y = Data(y); - } - else { - Zoltan_Data.x = Data(x); - Zoltan_Data.y = Data(y); - Zoltan_Data.z = Data(z); - } - - // Initialize Zoltan - int argc = 0; - char **argv = nullptr; - - float ver = 0.0; - Zoltan_Initialize(argc, argv, &ver); - fmt::print("Using Zoltan version {:.2}, method {}\n", static_cast(ver), - interFace.decomposition_method()); - - Zoltan zz(Ioss::ParallelUtils::comm_world()); - - // Register Callback functions - // Using global Zoltan_Data; could register it here instead as data field. - zz.Set_Num_Obj_Fn(zoltan_num_obj, nullptr); - zz.Set_Obj_List_Fn(zoltan_obj_list, nullptr); - zz.Set_Num_Geom_Fn(zoltan_num_dim, nullptr); - zz.Set_Geom_Multi_Fn(zoltan_geom, nullptr); - - // Set parameters for Zoltan - zz.Set_Param("DEBUG_LEVEL", "0"); - std::string str = fmt::format("{}", ranks); - zz.Set_Param("NUM_GLOBAL_PARTS", str); - zz.Set_Param("OBJ_WEIGHT_DIM", "1"); - zz.Set_Param("LB_METHOD", interFace.decomposition_method()); - zz.Set_Param("NUM_LID_ENTRIES", "0"); - zz.Set_Param("REMAP", "0"); - zz.Set_Param("RETURN_LISTS", "PARTITION_ASSIGNMENTS"); - zz.Set_Param("RCB_RECTILINEAR_BLOCKS", "1"); - - int num_global = sizeof(INT) / sizeof(ZOLTAN_ID_TYPE); - num_global = num_global < 1 ? 1 : num_global; - - // Call partitioner - int changes = 0; - int num_local = 0; - int num_import = 1; - int num_export = 1; - ZOLTAN_ID_PTR import_global_ids = nullptr; - ZOLTAN_ID_PTR import_local_ids = nullptr; - ZOLTAN_ID_PTR export_global_ids = nullptr; - ZOLTAN_ID_PTR export_local_ids = nullptr; - int *import_procs = nullptr; - int *import_to_part = nullptr; - int *export_procs = nullptr; - int *export_to_part = nullptr; - int rc = zz.LB_Partition(changes, num_global, num_local, num_import, import_global_ids, - import_local_ids, import_procs, import_to_part, num_export, - export_global_ids, export_local_ids, export_procs, export_to_part); - - if (rc != ZOLTAN_OK) { - fmt::print(stderr, "ERROR: Problem during call to Zoltan LB_Partition.\n"); - goto End; - } - - // Sanity check - if (element_count != static_cast(num_export)) { - fmt::print(stderr, "Sanity check failed; ndot {} != num_export {}.\n", element_count, - static_cast(num_export)); - goto End; - } - - elem_to_proc.resize(element_count); - for (size_t i = 0; i < element_count; i++) { - elem_to_proc[i] = export_to_part[i]; - } - - End: - /* Clean up */ - Zoltan::LB_Free_Part(&export_global_ids, &export_local_ids, &export_procs, &export_to_part); - Zoltan::LB_Free_Part(&export_global_ids, &export_local_ids, &export_procs, &export_to_part); - } -#endif - #if USE_METIS int get_common_node_count(const Ioss::Region ®ion) { @@ -552,13 +344,9 @@ std::vector decompose_elements(const Ioss::Region ®ion, SystemInterface else if (interFace.decomposition_method() == "rcb" || interFace.decomposition_method() == "rib" || interFace.decomposition_method() == "hsfc") { -#if USE_ZOLTAN - decompose_zoltan(region, interFace.processor_count(), interFace, elem_to_proc, weights, dummy); -#else - fmt::print(stderr, "ERROR: Zoltan library not enabled in this version of slice.\n" - " The 'rcb', 'rib', and 'hsfc' methods are not available.\n\n"); - std::exit(1); -#endif + Ioss::DecompUtils::decompose_zoltan(region, interFace.processor_count(), interFace.decomposition_method(), + elem_to_proc, weights, + interFace.ignore_x_, interFace.ignore_y_, interFace.ignore_z_, dummy); } else if (interFace.decomposition_method() == "rb" || interFace.decomposition_method() == "kway") { diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C index 0a4dad45ce..f4bbf84e46 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -186,12 +186,29 @@ std::map> string_chains(const Ioss::chain_t &element_ } fmt::print("\n"); } +} + +namespace Ioss { + template void DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, + std::vector &elem_to_proc, const std::vector &weights, + bool ignore_x, bool ignore_y, bool ignore_z, + IOSS_MAYBE_UNUSED int dummy); + template void DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, + std::vector &elem_to_proc, const std::vector &weights, + bool ignore_x, bool ignore_y, bool ignore_z, + IOSS_MAYBE_UNUSED int64_t dummy); template - void decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, - std::vector &elem_to_proc, const std::vector &weights, - IOSS_MAYBE_UNUSED INT dummy) + void DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, + std::vector &elem_to_proc, const std::vector &weights, + bool ignore_x, bool ignore_y, bool ignore_z, + IOSS_MAYBE_UNUSED INT dummy) { +#if defined(NO_ZOLTAN_SUPPORT) + fmt::print(stderr, "ERROR: Zoltan library not enabled in this version of slice.\n" + " The 'rcb', 'rib', and 'hsfc' methods are not available.\n\n"); + std::exit(1); +#else if (ranks == 1) { return; } @@ -209,9 +226,32 @@ std::map> string_chains(const Ioss::chain_t &element_ Zoltan_Data.ndot = element_count; Zoltan_Data.vwgt = const_cast(Data(weights)); - Zoltan_Data.x = Data(x); - Zoltan_Data.y = Data(y); - Zoltan_Data.z = Data(z); + if (ignore_x && ignore_y) { + Zoltan_Data.x = Data(z); + } + else if (ignore_x && ignore_z) { + Zoltan_Data.x = Data(y); + } + else if (ignore_y && ignore_z) { + Zoltan_Data.x = Data(x); + } + else if (ignore_x) { + Zoltan_Data.x = Data(y); + Zoltan_Data.y = Data(z); + } + else if (ignore_y) { + Zoltan_Data.x = Data(x); + Zoltan_Data.y = Data(z); + } + else if (ignore_z) { + Zoltan_Data.x = Data(x); + Zoltan_Data.y = Data(y); + } + else { + Zoltan_Data.x = Data(x); + Zoltan_Data.y = Data(y); + Zoltan_Data.z = Data(z); + } // Initialize Zoltan int argc = 0; @@ -283,10 +323,9 @@ std::map> string_chains(const Ioss::chain_t &element_ /* Clean up */ Zoltan::LB_Free_Part(&export_global_ids, &export_local_ids, &export_procs, &export_to_part); Zoltan::LB_Free_Part(&export_global_ids, &export_local_ids, &export_procs, &export_to_part); +#endif } -} -namespace Ioss { template int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc, INT dummy) { @@ -300,7 +339,7 @@ namespace Ioss { region.get_database()->progress("generate_element_weights"); double start = Ioss::Utils::timer(); - decompose_zoltan(region, num_ranks, method, element_to_proc, weights, dummy); + decompose_zoltan(region, num_ranks, method, element_to_proc, weights, false, false, false, dummy); double end = Ioss::Utils::timer(); fmt::print(stderr, "Decompose elements = {:.5}\n", end - start); region.get_database()->progress("exit decompose_elements"); diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h index 9386bba275..4ce2b67ede 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h @@ -35,6 +35,12 @@ namespace Ioss { const std::string &surface_list, std::vector &element_to_proc, INT dummy); + template + static void decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, + std::vector &elem_to_proc, const std::vector &weights, + bool ignore_x, bool ignore_y, bool ignore_z, + IOSS_MAYBE_UNUSED INT dummy); + template static std::tuple, std::vector, std::vector> get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED INT dummy); From 016ddf7cc188d799cee6bfff56447f956a62d3bd Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Mon, 10 Jun 2024 15:57:05 -0600 Subject: [PATCH 24/33] IOSS: Fix msys2 build --- packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C index f4bbf84e46..ad2a2a83ee 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -189,11 +189,11 @@ std::map> string_chains(const Ioss::chain_t &element_ } namespace Ioss { - template void DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, + template IOSS_EXPORT void DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, std::vector &elem_to_proc, const std::vector &weights, bool ignore_x, bool ignore_y, bool ignore_z, IOSS_MAYBE_UNUSED int dummy); - template void DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, + template IOSS_EXPORT void DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, std::vector &elem_to_proc, const std::vector &weights, bool ignore_x, bool ignore_y, bool ignore_z, IOSS_MAYBE_UNUSED int64_t dummy); From e4f782e356e13e2e5beba563a4a9cb69c609a5e6 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Mon, 10 Jun 2024 15:58:56 -0600 Subject: [PATCH 25/33] clang-format run --- .../seacas/applications/slice/SL_Decompose.C | 10 +- packages/seacas/applications/slice/Slice.C | 11 +- .../libraries/ioss/src/Ioss_ChainGenerator.C | 13 +- .../libraries/ioss/src/Ioss_Decomposition.C | 9 +- .../ioss/src/Ioss_DecompositionUtils.C | 394 +++++++++--------- .../ioss/src/Ioss_DecompositionUtils.h | 6 +- .../libraries/ioss/src/Ioss_FaceGenerator.C | 13 +- .../libraries/ioss/src/Ioss_FaceGenerator.h | 8 +- .../seacas/libraries/ioss/src/Ioss_Property.h | 3 +- .../ioss/src/exodus/Ioex_DatabaseIO.C | 4 +- .../ioss/src/exodus/Ioex_DecompositionData.C | 61 +-- 11 files changed, 275 insertions(+), 257 deletions(-) diff --git a/packages/seacas/applications/slice/SL_Decompose.C b/packages/seacas/applications/slice/SL_Decompose.C index bc87a9742e..a93ef45866 100644 --- a/packages/seacas/applications/slice/SL_Decompose.C +++ b/packages/seacas/applications/slice/SL_Decompose.C @@ -273,9 +273,9 @@ namespace { template std::vector decompose_elements(const Ioss::Region ®ion, SystemInterface &interFace, const std::vector &weights, - IOSS_MAYBE_UNUSED int dummy); + IOSS_MAYBE_UNUSED int dummy); template std::vector decompose_elements(const Ioss::Region ®ion, SystemInterface &interFace, - const std::vector &weights, + const std::vector &weights, IOSS_MAYBE_UNUSED int64_t dummy); template @@ -344,9 +344,9 @@ std::vector decompose_elements(const Ioss::Region ®ion, SystemInterface else if (interFace.decomposition_method() == "rcb" || interFace.decomposition_method() == "rib" || interFace.decomposition_method() == "hsfc") { - Ioss::DecompUtils::decompose_zoltan(region, interFace.processor_count(), interFace.decomposition_method(), - elem_to_proc, weights, - interFace.ignore_x_, interFace.ignore_y_, interFace.ignore_z_, dummy); + Ioss::DecompUtils::decompose_zoltan( + region, interFace.processor_count(), interFace.decomposition_method(), elem_to_proc, + weights, interFace.ignore_x_, interFace.ignore_y_, interFace.ignore_z_, dummy); } else if (interFace.decomposition_method() == "rb" || interFace.decomposition_method() == "kway") { diff --git a/packages/seacas/applications/slice/Slice.C b/packages/seacas/applications/slice/Slice.C index caf5b51341..a6f8b5c924 100644 --- a/packages/seacas/applications/slice/Slice.C +++ b/packages/seacas/applications/slice/Slice.C @@ -1375,7 +1375,7 @@ namespace { Ioss::PropertyManager properties = set_properties(interFace); Ioss::chain_t element_chains; - std::vector weights; + std::vector weights; if (interFace.lineDecomp_) { element_chains = Ioss::generate_element_chains(region, interFace.lineSurfaceList_, debug_level, dummy); @@ -1383,8 +1383,8 @@ namespace { if (interFace.decomposition_method() == "rcb" || interFace.decomposition_method() == "rib" || interFace.decomposition_method() == "hsfc") { - weights = - Ioss::DecompUtils::line_decomp_weights(element_chains, region.get_property("element_count").get_int()); + weights = Ioss::DecompUtils::line_decomp_weights( + element_chains, region.get_property("element_count").get_int()); progress("generate_element_weights"); } } @@ -1401,12 +1401,13 @@ namespace { if (interFace.lineDecomp_) { // Make sure all elements on a chain are on the same processor rank... - Ioss::DecompUtils::line_decomp_modify(element_chains, elem_to_proc, interFace.processor_count()); + Ioss::DecompUtils::line_decomp_modify(element_chains, elem_to_proc, + interFace.processor_count()); } if (debug_level & 32) { Ioss::DecompUtils::output_decomposition_statistics(elem_to_proc, interFace.processor_count(), - elem_to_proc.size()); + elem_to_proc.size()); } if (!create_split_files) { diff --git a/packages/seacas/libraries/ioss/src/Ioss_ChainGenerator.C b/packages/seacas/libraries/ioss/src/Ioss_ChainGenerator.C index 7e173bf749..92d762eae9 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_ChainGenerator.C +++ b/packages/seacas/libraries/ioss/src/Ioss_ChainGenerator.C @@ -84,10 +84,9 @@ namespace { template void get_line_front(Ioss::SideSet *fs, const Ioss::ElementBlock *block, - Ioss::chain_t &element_chains, - front_t &front) + Ioss::chain_t &element_chains, front_t &front) { - const auto adj_block_name = block->name(); + const auto adj_block_name = block->name(); Ioss::NameList blocks; fs->block_membership(blocks); for (const auto &fs_block : blocks) { @@ -215,10 +214,10 @@ namespace Ioss { auto *eb = region.get_element_block(blk_name); assert(eb != nullptr); if (eb->topology()->shape() != Ioss::ElementShape::HEX) { - fmt::print("Skipping Element Block {}; it does not contain HEX elements.\n", blk_name); + fmt::print("Skipping Element Block {}; it does not contain HEX elements.\n", blk_name); } else { - adjacent_blocks.push_back(eb); + adjacent_blocks.push_back(eb); } } @@ -228,8 +227,8 @@ namespace Ioss { for (const auto *block : adjacent_blocks) { // Get the offset into the element_chains vector... - auto offset = block->get_offset() + 1; - auto count = block->entity_count(); + auto offset = block->get_offset() + 1; + auto count = block->entity_count(); auto front = get_line_front(region, block, element_chains, surface_list); if (front.empty()) { diff --git a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C index 6f2980fd1c..48df8029e0 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C +++ b/packages/seacas/libraries/ioss/src/Ioss_Decomposition.C @@ -303,9 +303,9 @@ namespace Ioss { } if (props.exists("LINE_DECOMPOSITION")) { - // The value of the property should be a comma-separated list of surface/sideset names from which the lines will grow, - // or the value "ALL" for all surfaces in the model. - m_lineDecomp = true; + // The value of the property should be a comma-separated list of surface/sideset names from + // which the lines will grow, or the value "ALL" for all surfaces in the model. + m_lineDecomp = true; m_decompExtra = props.get("LINE_DECOMPOSITION").get_string(); } } @@ -461,7 +461,7 @@ namespace Ioss { } if (m_method == "SPECIFIED") { // Currently used for line decomposition with another decomposition type. - // The line-modified decomposition is done prior to this and builds the + // The line-modified decomposition is done prior to this and builds the // `m_elementToProc` which is then used here to decompose the elements... guided_decompose(); } @@ -503,7 +503,6 @@ namespace Ioss { show_progress("\tIoss::decompose model finished"); } - template IOSS_EXPORT void Decomposition::calculate_element_centroids( const std::vector &x, const std::vector &y, const std::vector &z); template IOSS_EXPORT void Decomposition::calculate_element_centroids( diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C index ad2a2a83ee..0ea5a2af78 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -8,14 +8,14 @@ #include #include -#include #include +#include -#include "Ioss_CodeTypes.h" #include "Ioss_ChainGenerator.h" +#include "Ioss_CodeTypes.h" #include "Ioss_DecompositionUtils.h" -#include "Ioss_NodeBlock.h" #include "Ioss_ElementBlock.h" +#include "Ioss_NodeBlock.h" #include "Ioss_ParallelUtils.h" #include "Ioss_Region.h" #include "Ioss_SmartAssert.h" @@ -115,19 +115,19 @@ namespace { } #endif -template -std::map> string_chains(const Ioss::chain_t &element_chains) -{ - std::map> chains; + template + std::map> string_chains(const Ioss::chain_t &element_chains) + { + std::map> chains; - for (size_t i = 0; i < element_chains.size(); i++) { - auto &chain_entry = element_chains[i]; - if (chain_entry.link >= 0) { - chains[chain_entry.element].push_back(i + 1); + for (size_t i = 0; i < element_chains.size(); i++) { + auto &chain_entry = element_chains[i]; + if (chain_entry.link >= 0) { + chains[chain_entry.element].push_back(i + 1); + } } + return chains; } - return chains; -} void output_histogram(const std::vector &proc_work, size_t avg_work, size_t median) { @@ -186,23 +186,25 @@ std::map> string_chains(const Ioss::chain_t &element_ } fmt::print("\n"); } -} +} // namespace namespace Ioss { - template IOSS_EXPORT void DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, - std::vector &elem_to_proc, const std::vector &weights, - bool ignore_x, bool ignore_y, bool ignore_z, - IOSS_MAYBE_UNUSED int dummy); - template IOSS_EXPORT void DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, - std::vector &elem_to_proc, const std::vector &weights, - bool ignore_x, bool ignore_y, bool ignore_z, - IOSS_MAYBE_UNUSED int64_t dummy); + template IOSS_EXPORT void + DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, + std::vector &elem_to_proc, const std::vector &weights, + bool ignore_x, bool ignore_y, bool ignore_z, + IOSS_MAYBE_UNUSED int dummy); + template IOSS_EXPORT void + DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, + std::vector &elem_to_proc, const std::vector &weights, + bool ignore_x, bool ignore_y, bool ignore_z, + IOSS_MAYBE_UNUSED int64_t dummy); template - void DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, - std::vector &elem_to_proc, const std::vector &weights, - bool ignore_x, bool ignore_y, bool ignore_z, - IOSS_MAYBE_UNUSED INT dummy) + void DecompUtils::decompose_zoltan(const Ioss::Region ®ion, int ranks, + const std::string &method, std::vector &elem_to_proc, + const std::vector &weights, bool ignore_x, + bool ignore_y, bool ignore_z, IOSS_MAYBE_UNUSED INT dummy) { #if defined(NO_ZOLTAN_SUPPORT) fmt::print(stderr, "ERROR: Zoltan library not enabled in this version of slice.\n" @@ -259,8 +261,7 @@ namespace Ioss { float ver = 0.0; Zoltan_Initialize(argc, argv, &ver); - fmt::print("Using Zoltan version {:.2}, method {}\n", static_cast(ver), - method); + fmt::print("Using Zoltan version {:.2}, method {}\n", static_cast(ver), method); Zoltan zz(Ioss::ParallelUtils::comm_self()); @@ -327,190 +328,203 @@ namespace Ioss { } template - int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc, INT dummy) -{ - - Ioss::chain_t element_chains = - Ioss::generate_element_chains(region, surface_list, 0, dummy); - region.get_database()->progress("Ioss::generate_element_chains"); - - std::vector weights = - line_decomp_weights(element_chains, region.get_property("element_count").get_int()); - region.get_database()->progress("generate_element_weights"); - - double start = Ioss::Utils::timer(); - decompose_zoltan(region, num_ranks, method, element_to_proc, weights, false, false, false, dummy); - double end = Ioss::Utils::timer(); - fmt::print(stderr, "Decompose elements = {:.5}\n", end - start); - region.get_database()->progress("exit decompose_elements"); - - // Make sure all elements on a chain are on the same processor rank... - line_decomp_modify(element_chains, element_to_proc, num_ranks); - - return 1; -} - - template IOSS_EXPORT int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc, int dummy); - template IOSS_EXPORT int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, const std::string &method, const std::string &surface_list, std::vector &element_to_proc, int64_t dummy); - -template -std::vector DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count) -{ - int debug_level = 0; - auto chains = string_chains(element_chains); - - if ((debug_level & 16) != 0) { - for (const auto &[chain_root, chain_elements] : chains) { - fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); - } - } + int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, const std::string &method, + const std::string &surface_list, + std::vector &element_to_proc, INT dummy) + { - std::vector weights(element_count, 1); - // Now, for each chain... - for (const auto &[chain_root, chain_elements] : chains) { - // * Set the weights of all elements in the chain... - // * non-root = 0, root = length of chain. - for (const auto &element : chain_elements) { - weights[element - 1] = 0; - } - weights[chain_root - 1] = static_cast(chain_elements.size()); + Ioss::chain_t element_chains = + Ioss::generate_element_chains(region, surface_list, 0, dummy); + region.get_database()->progress("Ioss::generate_element_chains"); + + std::vector weights = + line_decomp_weights(element_chains, region.get_property("element_count").get_int()); + region.get_database()->progress("generate_element_weights"); + + double start = Ioss::Utils::timer(); + decompose_zoltan(region, num_ranks, method, element_to_proc, weights, false, false, false, + dummy); + double end = Ioss::Utils::timer(); + fmt::print(stderr, "Decompose elements = {:.5}\n", end - start); + region.get_database()->progress("exit decompose_elements"); + + // Make sure all elements on a chain are on the same processor rank... + line_decomp_modify(element_chains, element_to_proc, num_ranks); + + return 1; } - return weights; -} -template IOSS_EXPORT std::vector DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, - size_t element_count); -template IOSS_EXPORT std::vector DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, - size_t element_count); - - -template -void DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, std::vector &elem_to_proc, - int proc_count) -{ - int debug_level = 0; - // Get a map of all chains and the elements in the chains. Map key will be root. - auto chains = string_chains(element_chains); - - // Delta: elements added/removed from each processor... - std::vector delta(proc_count); - - // Now, for each chain... - for (const auto &[chain_root, chain_elements] : chains) { - if ((debug_level & 16) != 0) { - fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); - } - std::vector chain_proc_count(proc_count); + template IOSS_EXPORT int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, + const std::string &method, + const std::string &surface_list, + std::vector &element_to_proc, + int dummy); + template IOSS_EXPORT int DecompUtils::line_decompose(Region ®ion, size_t num_ranks, + const std::string &method, + const std::string &surface_list, + std::vector &element_to_proc, + int64_t dummy); - // * get processors used by elements in the chain... - for (const auto &element : chain_elements) { - auto proc = elem_to_proc[element - 1]; - chain_proc_count[proc]++; - } + template + std::vector DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, + size_t element_count) + { + int debug_level = 0; + auto chains = string_chains(element_chains); - // * Now, subtract the `delta` from each count - for (int i = 0; i < proc_count; i++) { - chain_proc_count[i] -= delta[i]; + if ((debug_level & 16) != 0) { + for (const auto &[chain_root, chain_elements] : chains) { + fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); + } } - // * Assign all elements in the chain to processor at chain root - // * Update the deltas for all processors that gain/lose elements... - auto root_proc = elem_to_proc[chain_root - 1]; - for (const auto &element : chain_elements) { - if (elem_to_proc[element - 1] != root_proc) { - auto old_proc = elem_to_proc[element - 1]; - elem_to_proc[element - 1] = root_proc; - delta[root_proc]++; - delta[old_proc]--; + std::vector weights(element_count, 1); + // Now, for each chain... + for (const auto &[chain_root, chain_elements] : chains) { + // * Set the weights of all elements in the chain... + // * non-root = 0, root = length of chain. + for (const auto &element : chain_elements) { + weights[element - 1] = 0; } + weights[chain_root - 1] = static_cast(chain_elements.size()); } + return weights; } + template IOSS_EXPORT std::vector + DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, size_t element_count); + template IOSS_EXPORT std::vector + DecompUtils::line_decomp_weights(const Ioss::chain_t &element_chains, + size_t element_count); - std::vector proc_element_count(proc_count); - for (auto proc : elem_to_proc) { - proc_element_count[proc]++; - } - if ((debug_level & 32) != 0) { - fmt::print("\nElements/Processor: {}\n", fmt::join(proc_element_count, ", ")); - fmt::print("Delta/Processor: {}\n", fmt::join(delta, ", ")); - } -} + template + void DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, + std::vector &elem_to_proc, int proc_count) + { + int debug_level = 0; + // Get a map of all chains and the elements in the chains. Map key will be root. + auto chains = string_chains(element_chains); + // Delta: elements added/removed from each processor... + std::vector delta(proc_count); -template IOSS_EXPORT void DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, - std::vector &elem_to_proc, int proc_count); -template IOSS_EXPORT void DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, - std::vector &elem_to_proc, int proc_count); + // Now, for each chain... + for (const auto &[chain_root, chain_elements] : chains) { + if ((debug_level & 16) != 0) { + fmt::print("Chain Root: {} contains: {}\n", chain_root, fmt::join(chain_elements, ", ")); + } + + std::vector chain_proc_count(proc_count); + + // * get processors used by elements in the chain... + for (const auto &element : chain_elements) { + auto proc = elem_to_proc[element - 1]; + chain_proc_count[proc]++; + } + + // * Now, subtract the `delta` from each count + for (int i = 0; i < proc_count; i++) { + chain_proc_count[i] -= delta[i]; + } -void DecompUtils::output_decomposition_statistics(const std::vector &elem_to_proc, int proc_count, - size_t number_elements) -{ - // Output histogram of elements / rank... - std::vector elem_per_rank(proc_count); - for (int proc : elem_to_proc) { - elem_per_rank[proc]++; + // * Assign all elements in the chain to processor at chain root + // * Update the deltas for all processors that gain/lose elements... + auto root_proc = elem_to_proc[chain_root - 1]; + for (const auto &element : chain_elements) { + if (elem_to_proc[element - 1] != root_proc) { + auto old_proc = elem_to_proc[element - 1]; + elem_to_proc[element - 1] = root_proc; + delta[root_proc]++; + delta[old_proc]--; + } + } + } + + std::vector proc_element_count(proc_count); + for (auto proc : elem_to_proc) { + proc_element_count[proc]++; + } + if ((debug_level & 32) != 0) { + fmt::print("\nElements/Processor: {}\n", fmt::join(proc_element_count, ", ")); + fmt::print("Delta/Processor: {}\n", fmt::join(delta, ", ")); + } } - size_t proc_width = Ioss::Utils::number_width(proc_count, false); - size_t work_width = Ioss::Utils::number_width(number_elements, true); + template IOSS_EXPORT void + DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, + std::vector &elem_to_proc, int proc_count); + template IOSS_EXPORT void + DecompUtils::line_decomp_modify(const Ioss::chain_t &element_chains, + std::vector &elem_to_proc, int proc_count); - auto min_work = *std::min_element(elem_per_rank.begin(), elem_per_rank.end()); - auto max_work = *std::max_element(elem_per_rank.begin(), elem_per_rank.end()); - size_t median = 0; + void DecompUtils::output_decomposition_statistics(const std::vector &elem_to_proc, + int proc_count, size_t number_elements) { - auto pw_copy(elem_per_rank); - std::nth_element(pw_copy.begin(), pw_copy.begin() + pw_copy.size() / 2, pw_copy.end()); - median = pw_copy[pw_copy.size() / 2]; - fmt::print("\nElements per processor:\n\tMinimum = {}, Maximum = {}, Median = {}, Ratio = " - "{:.3}\n\n", - fmt::group_digits(min_work), fmt::group_digits(max_work), fmt::group_digits(median), - (double)(max_work) / min_work); - } - if (min_work == max_work) { - fmt::print("\nWork on all processors is {}\n\n", fmt::group_digits(min_work)); - } - else { - int max_star = 40; - int min_star = max_star * ((double)min_work / (double)(max_work)); - min_star = std::max(1, min_star); - int delta = max_star - min_star; - - double avg_work = (double)number_elements / (double)proc_count; - for (size_t i = 0; i < elem_per_rank.size(); i++) { - int star_cnt = - (double)(elem_per_rank[i] - min_work) / (max_work - min_work) * delta + min_star; - std::string stars(star_cnt, '*'); - std::string format = "\tProcessor {:{}}, work = {:{}} ({:.2f})\t{}\n"; - if (elem_per_rank[i] == max_work) { - fmt::print( + // Output histogram of elements / rank... + std::vector elem_per_rank(proc_count); + for (int proc : elem_to_proc) { + elem_per_rank[proc]++; + } + + size_t proc_width = Ioss::Utils::number_width(proc_count, false); + size_t work_width = Ioss::Utils::number_width(number_elements, true); + + auto min_work = *std::min_element(elem_per_rank.begin(), elem_per_rank.end()); + auto max_work = *std::max_element(elem_per_rank.begin(), elem_per_rank.end()); + size_t median = 0; + { + auto pw_copy(elem_per_rank); + std::nth_element(pw_copy.begin(), pw_copy.begin() + pw_copy.size() / 2, pw_copy.end()); + median = pw_copy[pw_copy.size() / 2]; + fmt::print("\nElements per processor:\n\tMinimum = {}, Maximum = {}, Median = {}, Ratio = " + "{:.3}\n\n", + fmt::group_digits(min_work), fmt::group_digits(max_work), + fmt::group_digits(median), (double)(max_work) / min_work); + } + if (min_work == max_work) { + fmt::print("\nWork on all processors is {}\n\n", fmt::group_digits(min_work)); + } + else { + int max_star = 40; + int min_star = max_star * ((double)min_work / (double)(max_work)); + min_star = std::max(1, min_star); + int delta = max_star - min_star; + + double avg_work = (double)number_elements / (double)proc_count; + for (size_t i = 0; i < elem_per_rank.size(); i++) { + int star_cnt = + (double)(elem_per_rank[i] - min_work) / (max_work - min_work) * delta + min_star; + std::string stars(star_cnt, '*'); + std::string format = "\tProcessor {:{}}, work = {:{}} ({:.2f})\t{}\n"; + if (elem_per_rank[i] == max_work) { + fmt::print( #if !defined __NVCC__ - fg(fmt::color::red), + fg(fmt::color::red), #endif - format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, - (double)elem_per_rank[i] / avg_work, stars); - } - else if (elem_per_rank[i] == min_work) { - fmt::print( + format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, + (double)elem_per_rank[i] / avg_work, stars); + } + else if (elem_per_rank[i] == min_work) { + fmt::print( #if !defined __NVCC__ - fg(fmt::color::green), + fg(fmt::color::green), #endif - format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, - elem_per_rank[i] / avg_work, stars); - } - else { - fmt::print(format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, - elem_per_rank[i] / avg_work, stars); + format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, + elem_per_rank[i] / avg_work, stars); + } + else { + fmt::print(format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, + elem_per_rank[i] / avg_work, stars); + } } - } - // Output Histogram... - output_histogram(elem_per_rank, (size_t)avg_work, median); + // Output Histogram... + output_histogram(elem_per_rank, (size_t)avg_work, median); + } } -} template std::tuple, std::vector, std::vector> - DecompUtils::get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED INT dummy) + DecompUtils::get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED INT dummy) { size_t element_count = region.get_property("element_count").get_int(); @@ -548,12 +562,10 @@ void DecompUtils::output_decomposition_statistics(const std::vector &elem_t return {x, y, z}; } -template IOSS_EXPORT - std::tuple, std::vector, std::vector> - DecompUtils::get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED int dummy); + template IOSS_EXPORT std::tuple, std::vector, std::vector> + DecompUtils::get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED int dummy); -template IOSS_EXPORT - std::tuple, std::vector, std::vector> - DecompUtils::get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED int64_t dummy); + template IOSS_EXPORT std::tuple, std::vector, std::vector> + DecompUtils::get_element_centroid(const Ioss::Region ®ion, IOSS_MAYBE_UNUSED int64_t dummy); -} +} // namespace Ioss diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h index 4ce2b67ede..ccbe838bb9 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h @@ -37,9 +37,9 @@ namespace Ioss { template static void decompose_zoltan(const Ioss::Region ®ion, int ranks, const std::string &method, - std::vector &elem_to_proc, const std::vector &weights, - bool ignore_x, bool ignore_y, bool ignore_z, - IOSS_MAYBE_UNUSED INT dummy); + std::vector &elem_to_proc, const std::vector &weights, + bool ignore_x, bool ignore_y, bool ignore_z, + IOSS_MAYBE_UNUSED INT dummy); template static std::tuple, std::vector, std::vector> diff --git a/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C b/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C index c9ddaa6b02..b167d1aae8 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C +++ b/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.C @@ -330,7 +330,7 @@ namespace Ioss { FaceUnorderedSet &FaceGenerator::faces(const Ioss::ElementBlock *block) { auto name = block->name(); - return faces_[name]; + return faces_[name]; } template IOSS_EXPORT void FaceGenerator::generate_faces(int, bool, bool); @@ -356,11 +356,14 @@ namespace Ioss { } } - template IOSS_EXPORT void FaceGenerator::generate_block_faces(const Ioss::ElementBlockContainer &, int, bool); - template IOSS_EXPORT void FaceGenerator::generate_block_faces(const Ioss::ElementBlockContainer &, int64_t, bool); + template IOSS_EXPORT void FaceGenerator::generate_block_faces(const Ioss::ElementBlockContainer &, + int, bool); + template IOSS_EXPORT void FaceGenerator::generate_block_faces(const Ioss::ElementBlockContainer &, + int64_t, bool); - template void FaceGenerator::generate_block_faces(const Ioss::ElementBlockContainer &ebs, - INT /*dummy*/, bool local_ids) + template + void FaceGenerator::generate_block_faces(const Ioss::ElementBlockContainer &ebs, INT /*dummy*/, + bool local_ids) { // Convert ids into hashed-ids Ioss::NodeBlock *nb = region_.get_node_blocks()[0]; diff --git a/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.h b/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.h index c8d4651751..2881b08f43 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.h +++ b/packages/seacas/libraries/ioss/src/Ioss_FaceGenerator.h @@ -14,8 +14,8 @@ #include #include -#include "ioss_export.h" #include "Ioss_Region.h" +#include "ioss_export.h" #define FG_USE_ROBIN #if defined FG_USE_STD @@ -123,9 +123,9 @@ namespace Ioss { template void generate_faces(INT /*dummy*/, bool block_by_block = false, bool local_ids = false); - template - void generate_block_faces(const ElementBlockContainer &ebs, - INT /*dummy*/, bool local_ids = false); + template + void generate_block_faces(const ElementBlockContainer &ebs, INT /*dummy*/, + bool local_ids = false); FaceUnorderedSet &faces(const std::string &name = "ALL") { return faces_[name]; } FaceUnorderedSet &faces(const ElementBlock *block); diff --git a/packages/seacas/libraries/ioss/src/Ioss_Property.h b/packages/seacas/libraries/ioss/src/Ioss_Property.h index 2ef5656778..fdb065e695 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Property.h +++ b/packages/seacas/libraries/ioss/src/Ioss_Property.h @@ -97,6 +97,7 @@ namespace Ioss { IOSS_NODISCARD bool operator!=(const Ioss::Property &rhs) const; IOSS_NODISCARD bool operator==(const Ioss::Property &rhs) const; +#if 0 friend void swap(Ioss::Property &first, Ioss::Property &second) noexcept { using std::swap; @@ -105,7 +106,7 @@ namespace Ioss { swap(first.origin_, second.origin_); swap(first.data_, second.data_); } - +#endif private: std::string name_{}; BasicType type_{INVALID}; diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C index 9a38458021..5e0540cc7e 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C @@ -282,8 +282,8 @@ namespace Ioex { double t_end = Ioss::Utils::timer(); double duration = util().global_minmax(t_end - t_begin, Ioss::ParallelUtils::DO_MAX); if (myProcessor == 0) { - fmt::print(Ioss::DebugOut(), "Input File Open Time = {} ({})\n", duration, - decoded_filename()); + fmt::print(Ioss::DebugOut(), "Input File Open Time = {} ({})\n", duration, + decoded_filename()); } } diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C index 10ac7b9044..5e06be4b03 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C @@ -5,14 +5,14 @@ // See packages/seacas/LICENSE for details #include "Ioss_CodeTypes.h" -#include "exodus/Ioex_DecompositionData.h" #include "Ioss_DecompositionUtils.h" +#include "exodus/Ioex_DecompositionData.h" #if defined PARALLEL_AWARE_EXODUS #include "Ioss_ElementTopology.h" #include "Ioss_Field.h" #include "Ioss_IOFactory.h" -#include "Ioss_Map.h" +#include "Ioss_Map.h" #include "Ioss_PropertyManager.h" #include "Ioss_Region.h" #include "Ioss_SmartAssert.h" @@ -84,10 +84,10 @@ namespace { if (wdim != 0) { if (zdata->weights().empty()) { - std::fill(wgts, wgts + element_count, 1.0); + std::fill(wgts, wgts + element_count, 1.0); } else { - std::copy(zdata->weights().begin(), zdata->weights().end(), &wgts[0]); + std::copy(zdata->weights().begin(), zdata->weights().end(), &wgts[0]); } } @@ -135,7 +135,8 @@ namespace Ioex { m_processorCount = pu.parallel_size(); } - template void DecompositionData::decompose_model(int filePtr, const std::string &filename) + template + void DecompositionData::decompose_model(int filePtr, const std::string &filename) { m_decomposition.show_progress(__func__); // Initial decomposition is linear where processor #p contains @@ -201,16 +202,16 @@ namespace Ioex { for (int i = 0; i < map_count; i++) { if (std::string(names[i]) == map_name) { m_decomposition.m_elementToProc.resize(decomp_elem_count()); - if (sizeof(INT) == 4) { - ex_get_partial_num_map(filePtr, EX_ELEM_MAP, i + 1, decomp_elem_offset() + 1, - decomp_elem_count(), Data(m_decomposition.m_elementToProc)); - } - else { - std::vector tmp_map(decomp_elem_count()); - ex_get_partial_num_map(filePtr, EX_ELEM_MAP, i + 1, decomp_elem_offset() + 1, - decomp_elem_count(), Data(tmp_map)); - std::copy(tmp_map.begin(), tmp_map.end(), m_decomposition.m_elementToProc.begin()); - } + if (sizeof(INT) == 4) { + ex_get_partial_num_map(filePtr, EX_ELEM_MAP, i + 1, decomp_elem_offset() + 1, + decomp_elem_count(), Data(m_decomposition.m_elementToProc)); + } + else { + std::vector tmp_map(decomp_elem_count()); + ex_get_partial_num_map(filePtr, EX_ELEM_MAP, i + 1, decomp_elem_offset() + 1, + decomp_elem_count(), Data(tmp_map)); + std::copy(tmp_map.begin(), tmp_map.end(), m_decomposition.m_elementToProc.begin()); + } map_read = true; break; } @@ -277,18 +278,20 @@ namespace Ioex { if (m_decomposition.m_lineDecomp) { // For first iteration of this, we do the line-decomp modified decomposition on a single rank // and then communicate the m_elementToProc vector to each of the ranks. This is then used - // do do the parallel distributions/decomposition of the elements assuming a "guided" decomposition. + // do do the parallel distributions/decomposition of the elements assuming a "guided" + // decomposition. std::vector element_to_proc_global{}; m_decomposition.show_progress("***LINE_DECOMPOSE BEGIN***"); if (m_processor == 0) { - Ioss::PropertyManager properties; - Ioss::DatabaseIO *dbi = Ioss::IOFactory::create("exodus", filename, Ioss::READ_RESTART, - Ioss::ParallelUtils::comm_self(), properties); - Ioss::Region region(dbi, "line_decomp_region"); - - int status = Ioss::DecompUtils::line_decompose(region, m_processorCount, m_decomposition.m_method, - m_decomposition.m_decompExtra, element_to_proc_global, INT(0)); + Ioss::PropertyManager properties; + Ioss::DatabaseIO *dbi = Ioss::IOFactory::create( + "exodus", filename, Ioss::READ_RESTART, Ioss::ParallelUtils::comm_self(), properties); + Ioss::Region region(dbi, "line_decomp_region"); + + int status = Ioss::DecompUtils::line_decompose( + region, m_processorCount, m_decomposition.m_method, m_decomposition.m_decompExtra, + element_to_proc_global, INT(0)); } // Now broadcast the parts of the `element_to_proc_global` // vector to the owning ranks in the initial linear @@ -302,21 +305,21 @@ namespace Ioex { int sum = 0; int rem = globalElementCount % m_processorCount; for (int i = 0; i < m_processorCount; i++) { - sendcounts[i] = globalElementCount/m_processorCount; + sendcounts[i] = globalElementCount / m_processorCount; if (rem > 0) { - sendcounts[i]++; - rem--; + sendcounts[i]++; + rem--; } displs[i] = sum; sum += sendcounts[i]; } - MPI_Scatterv(Data(element_to_proc_global), Data(sendcounts), Data(displs), MPI_INT, - Data(m_decomposition.m_elementToProc), decomp_elem_count(), MPI_INT, 0, m_decomposition.m_comm); + MPI_Scatterv(Data(element_to_proc_global), Data(sendcounts), Data(displs), MPI_INT, + Data(m_decomposition.m_elementToProc), decomp_elem_count(), MPI_INT, 0, + m_decomposition.m_comm); m_decomposition.m_method = "SPECIFIED"; m_decomposition.show_progress("***LINE_DECOMPOSE END***"); } - #if !defined(NO_ZOLTAN_SUPPORT) float version = 0.0; Zoltan_Initialize(0, nullptr, &version); From 283ebafc387daf9bab1d143fcf5c0df6a49a7a25 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Mon, 10 Jun 2024 16:34:54 -0600 Subject: [PATCH 26/33] SLICE: Version should be updated for latest chagnes --- packages/seacas/applications/slice/SL_Version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/seacas/applications/slice/SL_Version.h b/packages/seacas/applications/slice/SL_Version.h index 72fb5eaf86..1560a65a3c 100644 --- a/packages/seacas/applications/slice/SL_Version.h +++ b/packages/seacas/applications/slice/SL_Version.h @@ -9,6 +9,6 @@ static const std::array qainfo{ "slice", - "2024/04/03", - "2.2.01", + "2024/06/10", + "2.3.00", }; From a0a76cfd3055ebf18ec02d06b318933def4906b8 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Mon, 10 Jun 2024 17:08:55 -0600 Subject: [PATCH 27/33] IOSS: Reduce storage potentially; fix zoltan free call --- .../libraries/ioss/src/Ioss_DecompositionUtils.C | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C index 0ea5a2af78..5e8d5c22fa 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -229,23 +229,32 @@ namespace Ioss { Zoltan_Data.vwgt = const_cast(Data(weights)); if (ignore_x && ignore_y) { + x.clear(); + y.clear(); Zoltan_Data.x = Data(z); } else if (ignore_x && ignore_z) { + x.clear(); + z.clear(); Zoltan_Data.x = Data(y); } else if (ignore_y && ignore_z) { + y.clear(); + z.clear(); Zoltan_Data.x = Data(x); } else if (ignore_x) { + x.clear(); Zoltan_Data.x = Data(y); Zoltan_Data.y = Data(z); } else if (ignore_y) { + y.clear(); Zoltan_Data.x = Data(x); Zoltan_Data.y = Data(z); } else if (ignore_z) { + z.clear(); Zoltan_Data.x = Data(x); Zoltan_Data.y = Data(y); } @@ -322,7 +331,7 @@ namespace Ioss { End: /* Clean up */ - Zoltan::LB_Free_Part(&export_global_ids, &export_local_ids, &export_procs, &export_to_part); + Zoltan::LB_Free_Part(&import_global_ids, &import_local_ids, &import_procs, &import_to_part); Zoltan::LB_Free_Part(&export_global_ids, &export_local_ids, &export_procs, &export_to_part); #endif } From 7cc9916af8ce6720eb4575c3f427ae41a2a9ce01 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Tue, 11 Jun 2024 08:07:38 -0600 Subject: [PATCH 28/33] SLICE: Fix order of file close and mpi_finalize --- packages/seacas/applications/slice/Slice.C | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/packages/seacas/applications/slice/Slice.C b/packages/seacas/applications/slice/Slice.C index a6f8b5c924..1b9eb4c715 100644 --- a/packages/seacas/applications/slice/Slice.C +++ b/packages/seacas/applications/slice/Slice.C @@ -388,12 +388,11 @@ int main(int argc, char *argv[]) dbi->set_surface_split_type(Ioss::SPLIT_BY_DONT_SPLIT); dbi->set_field_separator(0); - // NOTE: 'region' owns 'db' pointer at this time... - Ioss::Region region(dbi, "region_1"); - - region.output_summary(std::cerr, true); - try { + // NOTE: 'region' owns 'db' pointer at this time... + Ioss::Region region(dbi, "region_1"); + region.output_summary(std::cerr, true); + if (dbi->int_byte_size_api() == 4) { progress("4-byte slice"); slice(region, nem_file, interFace, 1); From 76cf01b4f493329a7d649c339745ba54ef6ad2c0 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Tue, 11 Jun 2024 08:08:54 -0600 Subject: [PATCH 29/33] IOSS: Enable decomposition statistics for line decomp --- packages/seacas/applications/slice/Slice.C | 3 +-- packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C | 5 +++-- packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h | 2 +- .../libraries/ioss/src/exodus/Ioex_DecompositionData.C | 4 ++++ 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/packages/seacas/applications/slice/Slice.C b/packages/seacas/applications/slice/Slice.C index 1b9eb4c715..be73865ceb 100644 --- a/packages/seacas/applications/slice/Slice.C +++ b/packages/seacas/applications/slice/Slice.C @@ -1405,8 +1405,7 @@ namespace { } if (debug_level & 32) { - Ioss::DecompUtils::output_decomposition_statistics(elem_to_proc, interFace.processor_count(), - elem_to_proc.size()); + Ioss::DecompUtils::output_decomposition_statistics(elem_to_proc, interFace.processor_count()); } if (!create_split_files) { diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C index 5e8d5c22fa..b065b56154 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -466,7 +466,7 @@ namespace Ioss { std::vector &elem_to_proc, int proc_count); void DecompUtils::output_decomposition_statistics(const std::vector &elem_to_proc, - int proc_count, size_t number_elements) + int proc_count) { // Output histogram of elements / rank... std::vector elem_per_rank(proc_count); @@ -474,6 +474,7 @@ namespace Ioss { elem_per_rank[proc]++; } + size_t number_elements = elem_to_proc.size(); size_t proc_width = Ioss::Utils::number_width(proc_count, false); size_t work_width = Ioss::Utils::number_width(number_elements, true); @@ -490,7 +491,7 @@ namespace Ioss { fmt::group_digits(median), (double)(max_work) / min_work); } if (min_work == max_work) { - fmt::print("\nWork on all processors is {}\n\n", fmt::group_digits(min_work)); + fmt::print("Work on all processors is {}\n\n", fmt::group_digits(min_work)); } else { int max_star = 40; diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h index ccbe838bb9..30b1b35a2c 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.h @@ -24,7 +24,7 @@ namespace Ioss { std::vector &element_to_proc, int proc_count); static void output_decomposition_statistics(const std::vector &element_to_proc, - int proc_count, size_t number_elements); + int proc_count); template static std::vector line_decomp_weights(const Ioss::chain_t &element_chains, diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C index 5e06be4b03..734316cf2c 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DecompositionData.C @@ -292,6 +292,10 @@ namespace Ioex { int status = Ioss::DecompUtils::line_decompose( region, m_processorCount, m_decomposition.m_method, m_decomposition.m_decompExtra, element_to_proc_global, INT(0)); + + if (m_decomposition.m_showHWM || m_decomposition.m_showProgress) { + Ioss::DecompUtils::output_decomposition_statistics(element_to_proc_global, m_processorCount); + } } // Now broadcast the parts of the `element_to_proc_global` // vector to the owning ranks in the initial linear From de4c9d29a0ebf973ce252e84e4dceb3b7bf14a9f Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Tue, 11 Jun 2024 09:50:38 -0600 Subject: [PATCH 30/33] IOSS: io_shell - add_processor_id_field works for exodus also --- packages/seacas/libraries/ioss/src/main/shell_interface.C | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/main/shell_interface.C b/packages/seacas/libraries/ioss/src/main/shell_interface.C index ce293550ea..02724f4781 100644 --- a/packages/seacas/libraries/ioss/src/main/shell_interface.C +++ b/packages/seacas/libraries/ioss/src/main/shell_interface.C @@ -207,12 +207,10 @@ void IOShell::Interface::enroll_options() "Files are decomposed externally into a file-per-processor in a parallel run.", nullptr); -#if defined(SEACAS_HAVE_CGNS) options_.enroll( "add_processor_id_field", Ioss::GetLongOption::NoValue, - "For CGNS, add a cell-centered field whose value is the processor id of that cell", nullptr); -#endif - + "Add a cell-centered field whose value is the processor id of that cell", nullptr); + options_.enroll("serialize_io_size", Ioss::GetLongOption::MandatoryValue, "Number of processors that can perform simultaneous IO operations in " "a parallel run; 0 to disable", @@ -510,9 +508,7 @@ bool IOShell::Interface::parse_options(int argc, char **argv, int my_processor) } #if defined(SEACAS_HAVE_MPI) -#if defined(SEACAS_HAVE_CGNS) add_processor_id_field = (options_.retrieve("add_processor_id_field") != nullptr); -#endif #if !defined(NO_ZOLTAN_SUPPORT) if (options_.retrieve("rcb") != nullptr) { From beeefb5e38b16b2c445f659cc8c3376dea09902b Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Tue, 11 Jun 2024 09:51:12 -0600 Subject: [PATCH 31/33] IOSS: compose output will add a proc_id map to output --- .../ioss/src/exodus/Ioex_ParallelDatabaseIO.C | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C index ebe977380c..713464f363 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C @@ -356,6 +356,24 @@ namespace { return total_data_size; } + void add_processor_id_map(Ioss::Region *region) + { + const auto &blocks = region->get_element_blocks(); + for (const auto &block : blocks) { + block->field_add(Ioss::Field("proc_id", block->field_int_type(), "scalar", Ioss::Field::MAP)); + } + } + + template + void output_processor_id_map(Ioss::Region *region, size_t my_element_count, int my_processor, INT /*dummy*/) + { + std::vector proc_id(my_element_count, my_processor); + const auto &blocks = region->get_element_blocks(); + for (const auto &block : blocks) { + block->put_field_data("proc_id", proc_id); + } + } + } // namespace namespace Ioex { @@ -4849,7 +4867,14 @@ namespace Ioex { if (behavior != Ioss::DB_APPEND && behavior != Ioss::DB_MODIFY) { output_node_map(); + add_processor_id_map(region); output_other_metadata(); + if (int_byte_size_api() == 8) { + output_processor_id_map(region, elementCount, myProcessor, int64_t(0)); + } + else { + output_processor_id_map(region, elementCount, myProcessor, int(0)); + } } } From d815a3c338e727610f3c83488d2ca2c5f590a6c5 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Tue, 11 Jun 2024 14:12:34 -0600 Subject: [PATCH 32/33] IOSS: thread-safe output_processor_id_map --- .../ioss/src/exodus/Ioex_ParallelDatabaseIO.C | 24 +++++++++---------- .../ioss/src/exodus/Ioex_ParallelDatabaseIO.h | 2 ++ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C index 713464f363..b0d9f4304d 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.C @@ -364,16 +364,6 @@ namespace { } } - template - void output_processor_id_map(Ioss::Region *region, size_t my_element_count, int my_processor, INT /*dummy*/) - { - std::vector proc_id(my_element_count, my_processor); - const auto &blocks = region->get_element_blocks(); - for (const auto &block : blocks) { - block->put_field_data("proc_id", proc_id); - } - } - } // namespace namespace Ioex { @@ -4815,6 +4805,16 @@ namespace Ioex { return num_to_get; } + template + void ParallelDatabaseIO::output_processor_id_map(Ioss::Region *region, INT /*dummy*/) + { + std::vector proc_id(elementCount, myProcessor); + const auto &blocks = region->get_element_blocks(); + for (const auto &block : blocks) { + put_field_internal(block, block->get_field("proc_id"), Data(proc_id), -1); + } + } + void ParallelDatabaseIO::write_meta_data(Ioss::IfDatabaseExistsBehavior behavior) { Ioss::Region *region = get_region(); @@ -4870,10 +4870,10 @@ namespace Ioex { add_processor_id_map(region); output_other_metadata(); if (int_byte_size_api() == 8) { - output_processor_id_map(region, elementCount, myProcessor, int64_t(0)); + output_processor_id_map(region, int64_t(0)); } else { - output_processor_id_map(region, elementCount, myProcessor, int(0)); + output_processor_id_map(region, int(0)); } } } diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.h b/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.h index b61bead2ef..2ab679de54 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.h +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_ParallelDatabaseIO.h @@ -197,6 +197,8 @@ namespace Ioex { void write_entity_transient_field(const Ioss::Field &field, const Ioss::GroupingEntity *ge, int64_t count, void *variables) const; void write_meta_data(Ioss::IfDatabaseExistsBehavior behavior) override; + template + void output_processor_id_map(Ioss::Region *region, INT /*dummy*/); // Read related metadata and store it in the region... void read_region(); From 6986bd22e3152ec62e81a72498b741bb8a1d0828 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Tue, 11 Jun 2024 14:12:58 -0600 Subject: [PATCH 33/33] CI: safer variable naming --- cmake-config | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake-config b/cmake-config index b786fc8800..1b9662c3ed 100755 --- a/cmake-config +++ b/cmake-config @@ -442,9 +442,9 @@ if [ "$SANITIZER" != "NO" ] ; then #sanitizer=dataflow #: DataFlowSanitizer, a general data flow analysis. #sanitizer=cfi #: control flow integrity checks. Requires -flto. #sanitizer=safe-stack #: safe stack protection against stack-based memory corruption errors. -SANITIZE="-fsanitize=${SANITIZER} -fno-omit-frame-pointer -fPIC" +OPT_SANITIZE="-fsanitize=${SANITIZER} -fno-omit-frame-pointer -fPIC" if [ "$SANITIZER" == "integer" ] ; then - SANITIZE="$SANITIZE -fno-sanitize=unsigned-integer-overflow" + OPT_SANITIZE="$OPT_SANITIZE -fno-sanitize=unsigned-integer-overflow" fi fi @@ -488,9 +488,9 @@ cmake -G "${GENERATOR}" \ -D CMAKE_CXX_COMPILER:FILEPATH=${CXX} \ -D CMAKE_C_COMPILER:FILEPATH=${CC} \ -D CMAKE_Fortran_COMPILER:FILEPATH=${FC} \ --D CMAKE_CXX_FLAGS="${CXXFLAGS} ${CXX_WARNING_FLAGS} ${SANITIZE}" \ --D CMAKE_C_FLAGS="${CFLAGS} ${C_WARNING_FLAGS} ${SANITIZE}" \ --D CMAKE_Fortran_FLAGS="${FFLAGS} ${F77_WARNING_FLAGS} ${SANITIZE}" \ +-D CMAKE_CXX_FLAGS="${CXXFLAGS} ${CXX_WARNING_FLAGS} ${OPT_SANITIZE}" \ +-D CMAKE_C_FLAGS="${CFLAGS} ${C_WARNING_FLAGS} ${OPT_SANITIZE}" \ +-D CMAKE_Fortran_FLAGS="${FFLAGS} ${F77_WARNING_FLAGS} ${OPT_SANITIZE}" \ -D Seacas_ENABLE_STRONG_C_COMPILE_WARNINGS=${EXTRA_WARNINGS} \ -D Seacas_ENABLE_STRONG_CXX_COMPILE_WARNINGS=${EXTRA_WARNINGS} \ -D CMAKE_INSTALL_RPATH:PATH=${INSTALL_PATH}/lib \