From 4ee059d955a505ccbc174e7c6260c1988644bbb5 Mon Sep 17 00:00:00 2001 From: IgorBaratta Date: Thu, 15 Sep 2022 17:28:02 +0100 Subject: [PATCH 01/11] update readme --- README.md | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9d96df09049..cd37f72eb41 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,6 @@ Solving Environment in C++ and Python. DOLFINx is a new version of DOLFIN and is being actively developed. - ## Documentation Documentation can be viewed at: @@ -28,6 +27,7 @@ Documentation can be viewed at: #### C++ core To build and install the C++ core, in the ``cpp/`` directory, run:: + ```shell mkdir build cd build @@ -39,9 +39,11 @@ make install To install the Python interface, first install the C++ core, and then in the ``python/`` directory run:: + ```shell pip install . ``` + (you may need to use ``pip3``, depending on your system). For detailed instructions, see @@ -60,11 +62,13 @@ Conda is the recommended install method for Mac OS users. Linux users may also u To install the latest stable release of the Python interface, with pyvista support for visualisation, using [conda](https://conda.io): + ```shell conda create -n fenicsx-env conda activate fenicsx-env conda install -c conda-forge fenics-dolfinx mpich pyvista ``` + conda is distributed with [Anaconda](https://www.anaconda.com/) and [Miniconda](https://docs.conda.io/en/latest/miniconda.html). The conda recipe is hosted on @@ -85,6 +89,7 @@ Spack is recommended for building DOLFINx on HPC systems. To build the most recent release using [Spack](https://spack.readthedocs.io/) (assuming a bash-compatible shell): + ```shell git clone https://github.com/spack/spack.git . ./spack/share/spack/setup-env.sh @@ -93,6 +98,7 @@ spack env activate fenicsx-env spack add py-fenics-dolfinx cflags="-O3" fflags="-O3" spack install ``` + See the Spack [documentation](https://spack.readthedocs.io/) for comprehensive instructions. @@ -103,7 +109,8 @@ PPA](https://launchpad.net/~fenics-packages/+archive/ubuntu/fenics) contains binary packages of the FEniCSx components for Ubuntu. To install: -``` + +```shell add-apt-repository ppa:fenics-packages/fenics apt update apt install fenicsx @@ -122,34 +129,40 @@ of Debian. Install with `apt-get install fenicsx`. #### Docker images A Docker image with the latest stable release of DOLFINx: + ```shell docker run -ti dolfinx/dolfinx:stable ``` To switch between real and complex builds of DOLFINx/PETSc. + ```shell source /usr/local/bin/dolfinx-complex-mode source /usr/local/bin/dolfinx-real-mode ``` A Jupyter Lab environment with the latest stable release of DOLFINx: + ```shell docker run --init -ti -p 8888:8888 dolfinx/lab:stable # Access at http://localhost:8888 ``` A Docker image with DOLFINx built nightly: + ```shell docker run -ti dolfinx/dolfinx:nightly ``` A development image with all of the dependencies required to build the latest stable release of the FEniCSx components: + ```shell docker run -ti dolfinx/dev-env:stable ``` A development image with all of the dependencies required to build the `main` branch of the FEniCSx components: + ```shell docker run -ti dolfinx/dev-env:nightly ``` @@ -180,7 +193,6 @@ You should have received a copy of the GNU Lesser General Public License along with DOLFINx. If not, see . - ## Contact For questions about using DOLFINx, visit the FEniCS Discourse page: From 044c26ff6ea4759372183cf27896c3d55d01c567 Mon Sep 17 00:00:00 2001 From: IgorBaratta Date: Fri, 23 Sep 2022 14:37:58 +0100 Subject: [PATCH 02/11] improve mpi error handling --- README.md | 17 +++++++++-------- cpp/dolfinx/common/IndexMap.cpp | 25 +++++++++++++++---------- cpp/dolfinx/common/MPI.cpp | 13 ++++++++++++- cpp/dolfinx/common/MPI.h | 6 ++++++ cpp/dolfinx/mesh/graphbuild.cpp | 2 +- 5 files changed, 43 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index cd37f72eb41..5e06ea51606 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,8 @@ DOLFINx is a new version of DOLFIN and is being actively developed. Documentation can be viewed at: -- https://docs.fenicsproject.org/dolfinx/main/cpp/ -- https://docs.fenicsproject.org/dolfinx/main/python/ +- +- ## Installation @@ -47,11 +47,12 @@ pip install . (you may need to use ``pip3``, depending on your system). For detailed instructions, see -https://docs.fenicsproject.org/dolfinx/main/python/installation. +. ### Binary #### Operating System Recommendations + - Mac OS: use [conda](#conda). - Linux: use [apt](#ubuntu-packages) ([Ubuntu](#ubuntu-packages)/[Debian](#debian-packages)), [docker](#docker-images) or [conda](#conda). See also [Spack](#spack). - Windows: use [docker](#docker-images), or install Microsoft's [WSL2](https://docs.microsoft.com/en-us/windows/wsl/install) and use [Ubuntu](#ubuntu-packages). @@ -170,7 +171,7 @@ docker run -ti dolfinx/dev-env:nightly All Docker images support arm64 and amd64 architectures. For a full list of tags, including versioned images, see -https://hub.docker.com/u/dolfinx + ## Contributing @@ -197,14 +198,14 @@ License along with DOLFINx. If not, see For questions about using DOLFINx, visit the FEniCS Discourse page: -https://fenicsproject.discourse.group/ + or use the FEniCS Slack channel: -https://fenicsproject.slack.com/ + -(use https://fenicsproject-slack-invite.herokuapp.com/ to sign up) +(use to sign up) For bug reports visit: -https://github.com/FEniCS/dolfinx + diff --git a/cpp/dolfinx/common/IndexMap.cpp b/cpp/dolfinx/common/IndexMap.cpp index 927d20690ac..e25cc41c5e7 100644 --- a/cpp/dolfinx/common/IndexMap.cpp +++ b/cpp/dolfinx/common/IndexMap.cpp @@ -86,16 +86,18 @@ common::compute_owned_indices(const std::span& indices, // Create ghost -> owner comm MPI_Comm comm; - MPI_Dist_graph_create_adjacent(map.comm(), dest.size(), dest.data(), - MPI_UNWEIGHTED, src.size(), src.data(), - MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm); + int ierr = MPI_Dist_graph_create_adjacent( + map.comm(), dest.size(), dest.data(), MPI_UNWEIGHTED, src.size(), + src.data(), MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm); + dolfinx::MPI::assert_and_throw(ierr); // Exchange number of indices to send/receive from each rank std::vector recv_sizes(dest.size(), 0); send_sizes.reserve(1); recv_sizes.reserve(1); - MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, recv_sizes.data(), 1, - MPI_INT, comm); + ierr = MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, recv_sizes.data(), + 1, MPI_INT, comm); + dolfinx::MPI::assert_and_throw(ierr); // Prepare receive displacement array std::vector recv_disp(dest.size() + 1, 0); @@ -104,11 +106,14 @@ common::compute_owned_indices(const std::span& indices, // Send ghost indices to owner, and receive owned indices std::vector recv_buffer(recv_disp.back()); - MPI_Neighbor_alltoallv(send_buffer.data(), send_sizes.data(), - send_disp.data(), MPI_INT64_T, recv_buffer.data(), - recv_sizes.data(), recv_disp.data(), MPI_INT64_T, - comm); - MPI_Comm_free(&comm); + ierr = MPI_Neighbor_alltoallv(send_buffer.data(), send_sizes.data(), + send_disp.data(), MPI_INT64_T, + recv_buffer.data(), recv_sizes.data(), + recv_disp.data(), MPI_INT64_T, comm); + dolfinx::MPI::assert_and_throw(ierr); + + ierr = MPI_Comm_free(&comm); + dolfinx::MPI::assert_and_throw(ierr); // Remove duplicates from received indices std::sort(recv_buffer.begin(), recv_buffer.end()); diff --git a/cpp/dolfinx/common/MPI.cpp b/cpp/dolfinx/common/MPI.cpp index 8e04903750c..d7d2eb5b2b1 100644 --- a/cpp/dolfinx/common/MPI.cpp +++ b/cpp/dolfinx/common/MPI.cpp @@ -79,7 +79,6 @@ int dolfinx::MPI::rank(const MPI_Comm comm) return rank; } //----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- int dolfinx::MPI::size(const MPI_Comm comm) { int size; @@ -87,6 +86,18 @@ int dolfinx::MPI::size(const MPI_Comm comm) return size; } //----------------------------------------------------------------------------- +void dolfinx::MPI::assert_and_throw(int error_code) +{ + if (error_code != MPI_SUCCESS) + { + int len = MPI_MAX_ERROR_STRING; + std::string error_string(len, ' '); + MPI_Error_string(error_code, error_string.data(), &len); + error_string.resize(len); + throw std::runtime_error(error_string); + } +} +//----------------------------------------------------------------------------- std::vector dolfinx::MPI::compute_graph_edges_pcx(MPI_Comm comm, const std::span& edges) diff --git a/cpp/dolfinx/common/MPI.h b/cpp/dolfinx/common/MPI.h index 2a62fcfe794..ef30e4f0456 100644 --- a/cpp/dolfinx/common/MPI.h +++ b/cpp/dolfinx/common/MPI.h @@ -74,6 +74,12 @@ int rank(MPI_Comm comm); /// communicator int size(MPI_Comm comm); +/// @brief Checks wether an error code returned by an MPI +/// function is equal to MPI_SUCCESS. If the check fails then +/// throw a runtime error. +/// @param error_code Error code returned by an MPI function call. +void assert_and_throw(int error_code); + /// @brief Return local range for the calling process, partitioning the /// global [0, N - 1] range across all ranks into partitions of almost /// equal size. diff --git a/cpp/dolfinx/mesh/graphbuild.cpp b/cpp/dolfinx/mesh/graphbuild.cpp index 68099da2a41..4a3ae2f2732 100644 --- a/cpp/dolfinx/mesh/graphbuild.cpp +++ b/cpp/dolfinx/mesh/graphbuild.cpp @@ -91,7 +91,7 @@ graph::AdjacencyList compute_nonlocal_dual_graph( // TODO: Two possible straightforward optimisations: // 1. Do not send owned data to self via MPI. - // 2. Modify MPI::index_owner to use a subet of ranks as post offices. + // 2. Modify MPI::index_owner to use a subset of ranks as post offices. // 3. Find the max buffer row size for the neighbourhood rather than // globally. // From 8c7c804cb43a5574be4fbd69676238922f3ae2b3 Mon Sep 17 00:00:00 2001 From: IgorBaratta Date: Wed, 28 Sep 2022 17:29:57 +0100 Subject: [PATCH 03/11] update error handling --- cpp/dolfinx/common/IndexMap.cpp | 8 ++++---- cpp/dolfinx/common/MPI.cpp | 6 ++++-- cpp/dolfinx/common/MPI.h | 5 +++-- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/cpp/dolfinx/common/IndexMap.cpp b/cpp/dolfinx/common/IndexMap.cpp index e25cc41c5e7..2fe8d116ef0 100644 --- a/cpp/dolfinx/common/IndexMap.cpp +++ b/cpp/dolfinx/common/IndexMap.cpp @@ -89,7 +89,7 @@ common::compute_owned_indices(const std::span& indices, int ierr = MPI_Dist_graph_create_adjacent( map.comm(), dest.size(), dest.data(), MPI_UNWEIGHTED, src.size(), src.data(), MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm); - dolfinx::MPI::assert_and_throw(ierr); + dolfinx::MPI::assert_and_throw(map.comm(), ierr); // Exchange number of indices to send/receive from each rank std::vector recv_sizes(dest.size(), 0); @@ -97,7 +97,7 @@ common::compute_owned_indices(const std::span& indices, recv_sizes.reserve(1); ierr = MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, recv_sizes.data(), 1, MPI_INT, comm); - dolfinx::MPI::assert_and_throw(ierr); + dolfinx::MPI::assert_and_throw(comm, ierr); // Prepare receive displacement array std::vector recv_disp(dest.size() + 1, 0); @@ -110,10 +110,10 @@ common::compute_owned_indices(const std::span& indices, send_disp.data(), MPI_INT64_T, recv_buffer.data(), recv_sizes.data(), recv_disp.data(), MPI_INT64_T, comm); - dolfinx::MPI::assert_and_throw(ierr); + dolfinx::MPI::assert_and_throw(comm, ierr); ierr = MPI_Comm_free(&comm); - dolfinx::MPI::assert_and_throw(ierr); + dolfinx::MPI::assert_and_throw(comm, ierr); // Remove duplicates from received indices std::sort(recv_buffer.begin(), recv_buffer.end()); diff --git a/cpp/dolfinx/common/MPI.cpp b/cpp/dolfinx/common/MPI.cpp index d7d2eb5b2b1..1717da3a984 100644 --- a/cpp/dolfinx/common/MPI.cpp +++ b/cpp/dolfinx/common/MPI.cpp @@ -82,11 +82,12 @@ int dolfinx::MPI::rank(const MPI_Comm comm) int dolfinx::MPI::size(const MPI_Comm comm) { int size; - MPI_Comm_size(comm, &size); + int ierr = MPI_Comm_size(comm, &size); + dolfinx::MPI::assert_and_throw(comm, ierr); return size; } //----------------------------------------------------------------------------- -void dolfinx::MPI::assert_and_throw(int error_code) +void dolfinx::MPI::assert_and_throw(MPI_Comm comm, int error_code) { if (error_code != MPI_SUCCESS) { @@ -94,6 +95,7 @@ void dolfinx::MPI::assert_and_throw(int error_code) std::string error_string(len, ' '); MPI_Error_string(error_code, error_string.data(), &len); error_string.resize(len); + MPI_Abort(comm, error_code); throw std::runtime_error(error_string); } } diff --git a/cpp/dolfinx/common/MPI.h b/cpp/dolfinx/common/MPI.h index ef30e4f0456..6a7166a0438 100644 --- a/cpp/dolfinx/common/MPI.h +++ b/cpp/dolfinx/common/MPI.h @@ -77,8 +77,9 @@ int size(MPI_Comm comm); /// @brief Checks wether an error code returned by an MPI /// function is equal to MPI_SUCCESS. If the check fails then /// throw a runtime error. -/// @param error_code Error code returned by an MPI function call. -void assert_and_throw(int error_code); +/// @param[in] comm MPI communicator +/// @param[in] error_code Error code returned by an MPI function call. +void assert_and_throw(MPI_Comm comm, int error_code); /// @brief Return local range for the calling process, partitioning the /// global [0, N - 1] range across all ranks into partitions of almost From 9f32e9d7a260a8944ef9c874017589a3462bc824 Mon Sep 17 00:00:00 2001 From: IgorBaratta Date: Wed, 28 Sep 2022 17:56:32 +0100 Subject: [PATCH 04/11] force abortion --- cpp/dolfinx/common/MPI.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cpp/dolfinx/common/MPI.cpp b/cpp/dolfinx/common/MPI.cpp index 1717da3a984..e7ba602060b 100644 --- a/cpp/dolfinx/common/MPI.cpp +++ b/cpp/dolfinx/common/MPI.cpp @@ -95,8 +95,13 @@ void dolfinx::MPI::assert_and_throw(MPI_Comm comm, int error_code) std::string error_string(len, ' '); MPI_Error_string(error_code, error_string.data(), &len); error_string.resize(len); + + // Output error message + std::cerr << error_string << std::endl; MPI_Abort(comm, error_code); - throw std::runtime_error(error_string); + + // Terminate execution + std::abort(); } } //----------------------------------------------------------------------------- From 0882fa5a8dd73499a11761eff24e4fd25a1212ff Mon Sep 17 00:00:00 2001 From: IgorBaratta Date: Sat, 8 Oct 2022 16:00:04 +0100 Subject: [PATCH 05/11] fix name --- cpp/dolfinx/common/IndexMap.cpp | 189 +++++++++++++++++++------------- cpp/dolfinx/common/MPI.cpp | 4 +- cpp/dolfinx/common/MPI.h | 4 +- 3 files changed, 118 insertions(+), 79 deletions(-) diff --git a/cpp/dolfinx/common/IndexMap.cpp b/cpp/dolfinx/common/IndexMap.cpp index 2fe8d116ef0..150a6fe96af 100644 --- a/cpp/dolfinx/common/IndexMap.cpp +++ b/cpp/dolfinx/common/IndexMap.cpp @@ -89,7 +89,7 @@ common::compute_owned_indices(const std::span& indices, int ierr = MPI_Dist_graph_create_adjacent( map.comm(), dest.size(), dest.data(), MPI_UNWEIGHTED, src.size(), src.data(), MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm); - dolfinx::MPI::assert_and_throw(map.comm(), ierr); + dolfinx::MPI::check_error(map.comm(), ierr); // Exchange number of indices to send/receive from each rank std::vector recv_sizes(dest.size(), 0); @@ -97,7 +97,7 @@ common::compute_owned_indices(const std::span& indices, recv_sizes.reserve(1); ierr = MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, recv_sizes.data(), 1, MPI_INT, comm); - dolfinx::MPI::assert_and_throw(comm, ierr); + dolfinx::MPI::check_error(comm, ierr); // Prepare receive displacement array std::vector recv_disp(dest.size() + 1, 0); @@ -110,10 +110,10 @@ common::compute_owned_indices(const std::span& indices, send_disp.data(), MPI_INT64_T, recv_buffer.data(), recv_sizes.data(), recv_disp.data(), MPI_INT64_T, comm); - dolfinx::MPI::assert_and_throw(comm, ierr); + dolfinx::MPI::check_error(comm, ierr); ierr = MPI_Comm_free(&comm); - dolfinx::MPI::assert_and_throw(comm, ierr); + dolfinx::MPI::check_error(comm, ierr); // Remove duplicates from received indices std::sort(recv_buffer.begin(), recv_buffer.end()); @@ -178,12 +178,14 @@ common::stack_index_maps( // Create neighbour comms (0: ghost -> owner, 1: (owner -> ghost) MPI_Comm comm0, comm1; - MPI_Dist_graph_create_adjacent( + int ierr = MPI_Dist_graph_create_adjacent( maps.at(0).first.get().comm(), dest.size(), dest.data(), MPI_UNWEIGHTED, src.size(), src.data(), MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm0); - MPI_Dist_graph_create_adjacent( + dolfinx::MPI::check_error(maps.at(0).first.get().comm(), ierr); + ierr = MPI_Dist_graph_create_adjacent( maps.at(0).first.get().comm(), src.size(), src.data(), MPI_UNWEIGHTED, dest.size(), dest.data(), MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm1); + dolfinx::MPI::check_error(maps.at(0).first.get().comm(), ierr); // NOTE: We could perform each MPI call just once rather than per map, // but the complexity may not be worthwhile since this function is @@ -236,8 +238,9 @@ common::stack_index_maps( std::vector recv_sizes(dest.size(), 0); send_sizes.reserve(1); recv_sizes.reserve(1); - MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT32_T, recv_sizes.data(), - 1, MPI_INT32_T, comm0); + ierr = MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT32_T, + recv_sizes.data(), 1, MPI_INT32_T, comm0); + dolfinx::MPI::check_error(comm0, ierr); // Prepare displacement vectors std::vector send_disp(src.size() + 1, 0), @@ -249,10 +252,11 @@ common::stack_index_maps( // Send ghost indices to owner, and receive indices std::vector recv_indices(recv_disp.back()); - MPI_Neighbor_alltoallv(send_indices.data(), send_sizes.data(), - send_disp.data(), MPI_INT64_T, recv_indices.data(), - recv_sizes.data(), recv_disp.data(), MPI_INT64_T, - comm0); + ierr = MPI_Neighbor_alltoallv(send_indices.data(), send_sizes.data(), + send_disp.data(), MPI_INT64_T, + recv_indices.data(), recv_sizes.data(), + recv_disp.data(), MPI_INT64_T, comm0); + dolfinx::MPI::check_error(comm0, ierr); // For each received index (which I should own), compute its new // index in the concatenated index map @@ -269,10 +273,11 @@ common::stack_index_maps( // Send back/receive new indices std::vector ghosts_new_idx(send_disp.back()); - MPI_Neighbor_alltoallv(ghost_old_to_new.data(), recv_sizes.data(), - recv_disp.data(), MPI_INT64_T, ghosts_new_idx.data(), - send_sizes.data(), send_disp.data(), MPI_INT64_T, - comm1); + ierr = MPI_Neighbor_alltoallv(ghost_old_to_new.data(), recv_sizes.data(), + recv_disp.data(), MPI_INT64_T, + ghosts_new_idx.data(), send_sizes.data(), + send_disp.data(), MPI_INT64_T, comm1); + dolfinx::MPI::check_error(comm1, ierr); // Unpack new indices and store owner std::vector& ghost_idx = ghosts_new[m]; @@ -295,8 +300,11 @@ common::stack_index_maps( } // Destroy communicators - MPI_Comm_free(&comm0); - MPI_Comm_free(&comm1); + ierr = MPI_Comm_free(&comm0); + dolfinx::MPI::check_error(maps.at(0).first.get().comm(), ierr); + + ierr = MPI_Comm_free(&comm1); + dolfinx::MPI::check_error(maps.at(0).first.get().comm(), ierr); return {process_offset, std::move(local_offset), std::move(ghosts_new), std::move(ghost_owners_new)}; @@ -311,19 +319,23 @@ IndexMap::IndexMap(MPI_Comm comm, std::int32_t local_size) std::int64_t offset = 0; const std::int64_t local_size_tmp = local_size; MPI_Request request_scan; - MPI_Iexscan(&local_size_tmp, &offset, 1, MPI_INT64_T, MPI_SUM, comm, - &request_scan); + int ierr = MPI_Iexscan(&local_size_tmp, &offset, 1, MPI_INT64_T, MPI_SUM, + _comm.comm(), &request_scan); + dolfinx::MPI::check_error(_comm.comm(), ierr); // Send local size to sum reduction to get global size MPI_Request request; - MPI_Iallreduce(&local_size_tmp, &_size_global, 1, MPI_INT64_T, MPI_SUM, comm, - &request); + ierr = MPI_Iallreduce(&local_size_tmp, &_size_global, 1, MPI_INT64_T, MPI_SUM, + comm, &request); + dolfinx::MPI::check_error(_comm.comm(), ierr); - MPI_Wait(&request_scan, MPI_STATUS_IGNORE); + ierr = MPI_Wait(&request_scan, MPI_STATUS_IGNORE); + dolfinx::MPI::check_error(_comm.comm(), ierr); _local_range = {offset, offset + local_size}; // Wait for the MPI_Iallreduce to complete - MPI_Wait(&request, MPI_STATUS_IGNORE); + ierr = MPI_Wait(&request, MPI_STATUS_IGNORE); + dolfinx::MPI::check_error(_comm.comm(), ierr); } //----------------------------------------------------------------------------- IndexMap::IndexMap(MPI_Comm comm, std::int32_t local_size, @@ -350,20 +362,24 @@ IndexMap::IndexMap(MPI_Comm comm, std::int32_t local_size, std::int64_t offset = 0; const std::int64_t local_size_tmp = (std::int64_t)local_size; MPI_Request request_scan; - MPI_Iexscan(&local_size_tmp, &offset, 1, MPI_INT64_T, MPI_SUM, comm, - &request_scan); + int ierr = MPI_Iexscan(&local_size_tmp, &offset, 1, MPI_INT64_T, MPI_SUM, + comm, &request_scan); + dolfinx::MPI::check_error(_comm.comm(), ierr); // Send local size to sum reduction to get global size MPI_Request request; - MPI_Iallreduce(&local_size_tmp, &_size_global, 1, MPI_INT64_T, MPI_SUM, comm, - &request); + ierr = MPI_Iallreduce(&local_size_tmp, &_size_global, 1, MPI_INT64_T, MPI_SUM, + comm, &request); + dolfinx::MPI::check_error(_comm.comm(), ierr); // Wait for MPI_Iexscan to complete (get offset) - MPI_Wait(&request_scan, MPI_STATUS_IGNORE); + ierr = MPI_Wait(&request_scan, MPI_STATUS_IGNORE); + dolfinx::MPI::check_error(_comm.comm(), ierr); _local_range = {offset, offset + local_size}; // Wait for the MPI_Iallreduce to complete - MPI_Wait(&request, MPI_STATUS_IGNORE); + ierr = MPI_Wait(&request, MPI_STATUS_IGNORE); + dolfinx::MPI::check_error(_comm.comm(), ierr); } //----------------------------------------------------------------------------- std::array IndexMap::local_range() const noexcept @@ -459,8 +475,9 @@ IndexMap::create_submap(const std::span& indices) const std::int64_t local_size_new = indices.size(); std::int64_t offset_new = 0; MPI_Request request_offset; - MPI_Iexscan(&local_size_new, &offset_new, 1, MPI_INT64_T, MPI_SUM, - _comm.comm(), &request_offset); + int ierr = MPI_Iexscan(&local_size_new, &offset_new, 1, MPI_INT64_T, MPI_SUM, + _comm.comm(), &request_offset); + dolfinx::MPI::check_error(_comm.comm(), ierr); // --- Step 2: Send ghost indices to owning rank @@ -481,9 +498,10 @@ IndexMap::create_submap(const std::span& indices) const { // Create neighbourhood comm (ghost -> owner) MPI_Comm comm0; - MPI_Dist_graph_create_adjacent( + int ierr = MPI_Dist_graph_create_adjacent( _comm.comm(), dest.size(), dest.data(), MPI_UNWEIGHTED, src.size(), src.data(), MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm0); + dolfinx::MPI::check_error(_comm.comm(), ierr); // Pack ghosts indices std::vector> send_data(src.size()); @@ -514,8 +532,9 @@ IndexMap::create_submap(const std::span& indices) const recv_sizes.resize(dest.size(), 0); send_sizes.reserve(1); recv_sizes.reserve(1); - MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT32_T, recv_sizes.data(), - 1, MPI_INT32_T, comm0); + ierr = MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT32_T, + recv_sizes.data(), 1, MPI_INT32_T, comm0); + dolfinx::MPI::check_error(_comm.comm(), ierr); // Prepare displacement vectors send_disp.resize(src.size() + 1, 0); @@ -527,15 +546,18 @@ IndexMap::create_submap(const std::span& indices) const // Send ghost indices to owner, and receive indices recv_indices.resize(recv_disp.back()); - MPI_Neighbor_alltoallv(send_indices.data(), send_sizes.data(), - send_disp.data(), MPI_INT64_T, recv_indices.data(), - recv_sizes.data(), recv_disp.data(), MPI_INT64_T, - comm0); - - MPI_Comm_free(&comm0); + ierr = MPI_Neighbor_alltoallv(send_indices.data(), send_sizes.data(), + send_disp.data(), MPI_INT64_T, + recv_indices.data(), recv_sizes.data(), + recv_disp.data(), MPI_INT64_T, comm0); + dolfinx::MPI::check_error(_comm.comm(), ierr); + + ierr = MPI_Comm_free(&comm0); + dolfinx::MPI::check_error(_comm.comm(), ierr); } - MPI_Wait(&request_offset, MPI_STATUS_IGNORE); + ierr = MPI_Wait(&request_offset, MPI_STATUS_IGNORE); + dolfinx::MPI::check_error(_comm.comm(), ierr); // --- Step 3: Check which received indexes (all of which I should // own) are in the submap @@ -567,17 +589,21 @@ IndexMap::create_submap(const std::span& indices) const // Create neighbourhood comm (owner -> ghost) MPI_Comm comm1; - MPI_Dist_graph_create_adjacent(_comm.comm(), src.size(), src.data(), - MPI_UNWEIGHTED, dest.size(), dest.data(), - MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm1); + ierr = MPI_Dist_graph_create_adjacent( + _comm.comm(), src.size(), src.data(), MPI_UNWEIGHTED, dest.size(), + dest.data(), MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm1); + dolfinx::MPI::check_error(_comm.comm(), ierr); // Send index markers to ghosting ranks std::vector recv_gidx(send_disp.back()); - MPI_Neighbor_alltoallv(send_gidx.data(), recv_sizes.data(), recv_disp.data(), - MPI_INT64_T, recv_gidx.data(), send_sizes.data(), - send_disp.data(), MPI_INT64_T, comm1); + ierr = MPI_Neighbor_alltoallv(send_gidx.data(), recv_sizes.data(), + recv_disp.data(), MPI_INT64_T, recv_gidx.data(), + send_sizes.data(), send_disp.data(), + MPI_INT64_T, comm1); + dolfinx::MPI::check_error(_comm.comm(), ierr); - MPI_Comm_free(&comm1); + ierr = MPI_Comm_free(&comm1); + dolfinx::MPI::check_error(_comm.comm(), ierr); // --- Step 5: Unpack received data @@ -661,16 +687,18 @@ graph::AdjacencyList IndexMap::index_to_dest_ranks() const // Create ghost -> owner comm MPI_Comm comm0; - MPI_Dist_graph_create_adjacent( + int ierr = MPI_Dist_graph_create_adjacent( _comm.comm(), dest.size(), dest.data(), MPI_UNWEIGHTED, src.size(), src.data(), MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm0); + dolfinx::MPI::check_error(_comm.comm(), ierr); // Exchange number of indices to send/receive from each rank std::vector recv_sizes(dest.size(), 0); send_sizes.reserve(1); recv_sizes.reserve(1); - MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, recv_sizes.data(), 1, - MPI_INT, comm0); + ierr = MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, + recv_sizes.data(), 1, MPI_INT, comm0); + dolfinx::MPI::check_error(_comm.comm(), ierr); // Prepare receive displacement array std::vector recv_disp(dest.size() + 1, 0); @@ -679,11 +707,13 @@ graph::AdjacencyList IndexMap::index_to_dest_ranks() const // Send ghost indices to owner, and receive owned indices std::vector recv_buffer(recv_disp.back()); - MPI_Neighbor_alltoallv(send_buffer.data(), send_sizes.data(), - send_disp.data(), MPI_INT64_T, recv_buffer.data(), - recv_sizes.data(), recv_disp.data(), MPI_INT64_T, - comm0); - MPI_Comm_free(&comm0); + ierr = MPI_Neighbor_alltoallv(send_buffer.data(), send_sizes.data(), + send_disp.data(), MPI_INT64_T, + recv_buffer.data(), recv_sizes.data(), + recv_disp.data(), MPI_INT64_T, comm0); + dolfinx::MPI::check_error(_comm.comm(), ierr); + ierr = MPI_Comm_free(&comm0); + dolfinx::MPI::check_error(_comm.comm(), ierr); // Build array of (local index, ghosting local rank), and sort for (std::size_t r = 0; r < recv_disp.size() - 1; ++r) @@ -754,17 +784,19 @@ graph::AdjacencyList IndexMap::index_to_dest_ranks() const // Create owner -> ghost comm MPI_Comm comm; - MPI_Dist_graph_create_adjacent( + int ierr = MPI_Dist_graph_create_adjacent( _comm.comm(), src.size(), src.data(), MPI_UNWEIGHTED, dest.size(), dest.data(), MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm); + dolfinx::MPI::check_error(_comm.comm(), ierr); // Send how many indices I ghost to each owner, and receive how // many of my indices other ranks ghost std::vector recv_sizes(src.size(), 0); send_sizes.reserve(1); recv_sizes.reserve(1); - MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, recv_sizes.data(), 1, - MPI_INT, comm); + ierr = MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, + recv_sizes.data(), 1, MPI_INT, comm); + dolfinx::MPI::check_error(_comm.comm(), ierr); // Prepare displacement vectors std::vector send_disp(dest.size() + 1, 0), @@ -775,11 +807,13 @@ graph::AdjacencyList IndexMap::index_to_dest_ranks() const std::next(recv_disp.begin())); std::vector recv_indices(recv_disp.back()); - MPI_Neighbor_alltoallv(send_buffer.data(), send_sizes.data(), - send_disp.data(), MPI_INT64_T, recv_indices.data(), - recv_sizes.data(), recv_disp.data(), MPI_INT64_T, - comm); - MPI_Comm_free(&comm); + ierr = MPI_Neighbor_alltoallv(send_buffer.data(), send_sizes.data(), + send_disp.data(), MPI_INT64_T, + recv_indices.data(), recv_sizes.data(), + recv_disp.data(), MPI_INT64_T, comm); + dolfinx::MPI::check_error(_comm.comm(), ierr); + ierr = MPI_Comm_free(&comm); + dolfinx::MPI::check_error(_comm.comm(), ierr); // Build list of (ghost index, ghost position) pairs for indices // ghosted by this rank, and sort @@ -868,15 +902,17 @@ std::vector IndexMap::shared_indices() const // Create ghost -> owner comm MPI_Comm comm; - MPI_Dist_graph_create_adjacent(_comm.comm(), dest.size(), dest.data(), - MPI_UNWEIGHTED, src.size(), src.data(), - MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm); + int ierr = MPI_Dist_graph_create_adjacent( + _comm.comm(), dest.size(), dest.data(), MPI_UNWEIGHTED, src.size(), + src.data(), MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm); + dolfinx::MPI::check_error(_comm.comm(), ierr); std::vector recv_sizes(dest.size(), 0); send_sizes.reserve(1); recv_sizes.reserve(1); - MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, recv_sizes.data(), 1, - MPI_INT, comm); + ierr = MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, recv_sizes.data(), + 1, MPI_INT, comm); + dolfinx::MPI::check_error(_comm.comm(), ierr); // Prepare receive displacement array std::vector recv_disp(dest.size() + 1, 0); @@ -885,12 +921,15 @@ std::vector IndexMap::shared_indices() const // Send ghost indices to owner, and receive owned indices std::vector recv_buffer(recv_disp.back()); - MPI_Neighbor_alltoallv(send_buffer.data(), send_sizes.data(), - send_disp.data(), MPI_INT64_T, recv_buffer.data(), - recv_sizes.data(), recv_disp.data(), MPI_INT64_T, - comm); + ierr = MPI_Neighbor_alltoallv(send_buffer.data(), send_sizes.data(), + send_disp.data(), MPI_INT64_T, + recv_buffer.data(), recv_sizes.data(), + recv_disp.data(), MPI_INT64_T, comm); + dolfinx::MPI::check_error(_comm.comm(), ierr); + + ierr = MPI_Comm_free(&comm); + dolfinx::MPI::check_error(_comm.comm(), ierr); - MPI_Comm_free(&comm); std::vector shared; shared.reserve(recv_buffer.size()); diff --git a/cpp/dolfinx/common/MPI.cpp b/cpp/dolfinx/common/MPI.cpp index e7ba602060b..d8fbbb52317 100644 --- a/cpp/dolfinx/common/MPI.cpp +++ b/cpp/dolfinx/common/MPI.cpp @@ -83,11 +83,11 @@ int dolfinx::MPI::size(const MPI_Comm comm) { int size; int ierr = MPI_Comm_size(comm, &size); - dolfinx::MPI::assert_and_throw(comm, ierr); + dolfinx::MPI::check_error(comm, ierr); return size; } //----------------------------------------------------------------------------- -void dolfinx::MPI::assert_and_throw(MPI_Comm comm, int error_code) +void dolfinx::MPI::check_error(MPI_Comm comm, int error_code) { if (error_code != MPI_SUCCESS) { diff --git a/cpp/dolfinx/common/MPI.h b/cpp/dolfinx/common/MPI.h index 6a7166a0438..1fddd327ee7 100644 --- a/cpp/dolfinx/common/MPI.h +++ b/cpp/dolfinx/common/MPI.h @@ -76,10 +76,10 @@ int size(MPI_Comm comm); /// @brief Checks wether an error code returned by an MPI /// function is equal to MPI_SUCCESS. If the check fails then -/// throw a runtime error. +/// calls abort. /// @param[in] comm MPI communicator /// @param[in] error_code Error code returned by an MPI function call. -void assert_and_throw(MPI_Comm comm, int error_code); +void check_error(MPI_Comm comm, int error_code); /// @brief Return local range for the calling process, partitioning the /// global [0, N - 1] range across all ranks into partitions of almost From 0e0bb98db5f5ea2944ad3c4fc1149e130c766d60 Mon Sep 17 00:00:00 2001 From: IgorBaratta Date: Sat, 8 Oct 2022 16:09:05 +0100 Subject: [PATCH 06/11] check all error codes in MPI.cpp --- cpp/dolfinx/common/MPI.cpp | 81 ++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 38 deletions(-) diff --git a/cpp/dolfinx/common/MPI.cpp b/cpp/dolfinx/common/MPI.cpp index d8fbbb52317..7b21e249b40 100644 --- a/cpp/dolfinx/common/MPI.cpp +++ b/cpp/dolfinx/common/MPI.cpp @@ -15,11 +15,7 @@ dolfinx::MPI::Comm::Comm(MPI_Comm comm, bool duplicate) if (duplicate and comm != MPI_COMM_NULL) { int err = MPI_Comm_dup(comm, &_comm); - if (err != MPI_SUCCESS) - { - throw std::runtime_error( - "Duplication of MPI communicator failed (MPI_Comm_dup)"); - } + dolfinx::MPI::check_error(comm, err); } else _comm = comm; @@ -42,11 +38,7 @@ dolfinx::MPI::Comm::~Comm() if (_comm != MPI_COMM_NULL) { int err = MPI_Comm_free(&_comm); - if (err != MPI_SUCCESS) - { - std::cout << "Error when destroying communicator (MPI_Comm_free)." - << std::endl; - } + dolfinx::MPI::check_error(_comm, err); } } //----------------------------------------------------------------------------- @@ -57,11 +49,7 @@ dolfinx::MPI::Comm::operator=(dolfinx::MPI::Comm&& comm) noexcept if (this->_comm != MPI_COMM_NULL) { int err = MPI_Comm_free(&this->_comm); - if (err != MPI_SUCCESS) - { - std::cout << "Error when destroying communicator (MPI_Comm_free)." - << std::endl; - } + dolfinx::MPI::check_error(this->_comm, err); } // Move comm from other object @@ -75,15 +63,16 @@ MPI_Comm dolfinx::MPI::Comm::comm() const noexcept { return _comm; } int dolfinx::MPI::rank(const MPI_Comm comm) { int rank; - MPI_Comm_rank(comm, &rank); + int err = MPI_Comm_rank(comm, &rank); + dolfinx::MPI::check_error(comm, err); return rank; } //----------------------------------------------------------------------------- int dolfinx::MPI::size(const MPI_Comm comm) { int size; - int ierr = MPI_Comm_size(comm, &size); - dolfinx::MPI::check_error(comm, ierr); + int err = MPI_Comm_size(comm, &size); + dolfinx::MPI::check_error(comm, err); return size; } //----------------------------------------------------------------------------- @@ -95,7 +84,7 @@ void dolfinx::MPI::check_error(MPI_Comm comm, int error_code) std::string error_string(len, ' '); MPI_Error_string(error_code, error_string.data(), &len); error_string.resize(len); - + // Output error message std::cerr << error_string << std::endl; MPI_Abort(comm, error_code); @@ -125,34 +114,42 @@ dolfinx::MPI::compute_graph_edges_pcx(MPI_Comm comm, std::vector recvcounts(size, 1); int in_edges = 0; MPI_Request request_scatter; - MPI_Ireduce_scatter(edge_count_send.data(), &in_edges, recvcounts.data(), - MPI_INT, MPI_SUM, comm, &request_scatter); + int err = MPI_Ireduce_scatter(edge_count_send.data(), &in_edges, + recvcounts.data(), MPI_INT, MPI_SUM, comm, + &request_scatter); + dolfinx::MPI::check_error(comm, err); std::vector send_requests(edges.size()); std::byte send_buffer; for (std::size_t e = 0; e < edges.size(); ++e) { - MPI_Isend(&send_buffer, 1, MPI_BYTE, edges[e], - static_cast(tag::consensus_pcx), comm, &send_requests[e]); + int err = MPI_Isend(&send_buffer, 1, MPI_BYTE, edges[e], + static_cast(tag::consensus_pcx), comm, + &send_requests[e]); + dolfinx::MPI::check_error(comm, err); } // Probe for incoming messages and store incoming rank - MPI_Wait(&request_scatter, MPI_STATUS_IGNORE); + err = MPI_Wait(&request_scatter, MPI_STATUS_IGNORE); + dolfinx::MPI::check_error(comm, err); std::vector other_ranks; while (in_edges > 0) { // Check for message int request_pending; MPI_Status status; - MPI_Iprobe(MPI_ANY_SOURCE, static_cast(tag::consensus_pcx), comm, - &request_pending, &status); + int err = MPI_Iprobe(MPI_ANY_SOURCE, static_cast(tag::consensus_pcx), + comm, &request_pending, &status); + dolfinx::MPI::check_error(comm, err); if (request_pending) { // Receive message and store rank int other_rank = status.MPI_SOURCE; std::byte buffer_recv; - MPI_Recv(&buffer_recv, 1, MPI_BYTE, other_rank, - static_cast(tag::consensus_pcx), comm, MPI_STATUS_IGNORE); + int err = MPI_Recv(&buffer_recv, 1, MPI_BYTE, other_rank, + static_cast(tag::consensus_pcx), comm, + MPI_STATUS_IGNORE); + dolfinx::MPI::check_error(comm, err); other_ranks.push_back(other_rank); --in_edges; } @@ -179,8 +176,10 @@ dolfinx::MPI::compute_graph_edges_nbx(MPI_Comm comm, std::byte send_buffer; for (std::size_t e = 0; e < edges.size(); ++e) { - MPI_Issend(&send_buffer, 1, MPI_BYTE, edges[e], - static_cast(tag::consensus_pex), comm, &send_requests[e]); + int err = MPI_Issend(&send_buffer, 1, MPI_BYTE, edges[e], + static_cast(tag::consensus_pex), comm, + &send_requests[e]); + dolfinx::MPI::check_error(comm, err); } // Vector to hold ranks that send data to this rank @@ -195,8 +194,9 @@ dolfinx::MPI::compute_graph_edges_nbx(MPI_Comm comm, // Check for message int request_pending; MPI_Status status; - MPI_Iprobe(MPI_ANY_SOURCE, static_cast(tag::consensus_pex), comm, - &request_pending, &status); + int err = MPI_Iprobe(MPI_ANY_SOURCE, static_cast(tag::consensus_pex), + comm, &request_pending, &status); + dolfinx::MPI::check_error(comm, err); // Check if message is waiting to be processed if (request_pending) @@ -204,8 +204,10 @@ dolfinx::MPI::compute_graph_edges_nbx(MPI_Comm comm, // Receive it int other_rank = status.MPI_SOURCE; std::byte buffer_recv; - MPI_Recv(&buffer_recv, 1, MPI_BYTE, other_rank, - static_cast(tag::consensus_pex), comm, MPI_STATUS_IGNORE); + int err = MPI_Recv(&buffer_recv, 1, MPI_BYTE, other_rank, + static_cast(tag::consensus_pex), comm, + MPI_STATUS_IGNORE); + dolfinx::MPI::check_error(comm, err); other_ranks.push_back(other_rank); } @@ -213,7 +215,8 @@ dolfinx::MPI::compute_graph_edges_nbx(MPI_Comm comm, { // Check for barrier completion int flag = 0; - MPI_Test(&barrier_request, &flag, MPI_STATUS_IGNORE); + int err = MPI_Test(&barrier_request, &flag, MPI_STATUS_IGNORE); + dolfinx::MPI::check_error(comm, err); if (flag) comm_complete = true; } @@ -221,12 +224,14 @@ dolfinx::MPI::compute_graph_edges_nbx(MPI_Comm comm, { // Check if all sends have completed int flag = 0; - MPI_Testall(send_requests.size(), send_requests.data(), &flag, - MPI_STATUSES_IGNORE); + int err = MPI_Testall(send_requests.size(), send_requests.data(), &flag, + MPI_STATUSES_IGNORE); + dolfinx::MPI::check_error(comm, err); if (flag) { // All sends have completed, start non-blocking barrier - MPI_Ibarrier(comm, &barrier_request); + int err = MPI_Ibarrier(comm, &barrier_request); + dolfinx::MPI::check_error(comm, err); barrier_active = true; } } From f7ccd131b2cc5e2c0e1ecfcd3f67ff32fa7c30e2 Mon Sep 17 00:00:00 2001 From: IgorBaratta Date: Sat, 8 Oct 2022 16:33:57 +0100 Subject: [PATCH 07/11] mpi and table files --- cpp/dolfinx/common/MPI.h | 89 +++++++++++++++++++++--------------- cpp/dolfinx/common/Table.cpp | 19 +++++--- cpp/dolfinx/common/utils.h | 11 +++-- 3 files changed, 73 insertions(+), 46 deletions(-) diff --git a/cpp/dolfinx/common/MPI.h b/cpp/dolfinx/common/MPI.h index 1fddd327ee7..c0d0e45aa1d 100644 --- a/cpp/dolfinx/common/MPI.h +++ b/cpp/dolfinx/common/MPI.h @@ -378,9 +378,10 @@ distribute_to_postoffice(MPI_Comm comm, const std::span& x, // Create neighbourhood communicator for sending data to post offices MPI_Comm neigh_comm; - MPI_Dist_graph_create_adjacent(comm, src.size(), src.data(), MPI_UNWEIGHTED, - dest.size(), dest.data(), MPI_UNWEIGHTED, - MPI_INFO_NULL, false, &neigh_comm); + int err = MPI_Dist_graph_create_adjacent( + comm, src.size(), src.data(), MPI_UNWEIGHTED, dest.size(), dest.data(), + MPI_UNWEIGHTED, MPI_INFO_NULL, false, &neigh_comm); + dolfinx::MPI::check_error(comm, err); // Compute send displacements std::vector send_disp = {0}; @@ -410,8 +411,9 @@ distribute_to_postoffice(MPI_Comm comm, const std::span& x, std::vector num_items_recv(src.size()); num_items_per_dest.reserve(1); num_items_recv.reserve(1); - MPI_Neighbor_alltoall(num_items_per_dest.data(), 1, MPI_INT, - num_items_recv.data(), 1, MPI_INT, neigh_comm); + err = MPI_Neighbor_alltoall(num_items_per_dest.data(), 1, MPI_INT, + num_items_recv.data(), 1, MPI_INT, neigh_comm); + dolfinx::MPI::check_error(comm, err); // Prepare receive displacement and buffers std::vector recv_disp(num_items_recv.size() + 1, 0); @@ -420,22 +422,26 @@ distribute_to_postoffice(MPI_Comm comm, const std::span& x, // Send/receive global indices std::vector recv_buffer_index(recv_disp.back()); - MPI_Neighbor_alltoallv(send_buffer_index.data(), num_items_per_dest.data(), - send_disp.data(), MPI_INT64_T, - recv_buffer_index.data(), num_items_recv.data(), - recv_disp.data(), MPI_INT64_T, neigh_comm); + err = MPI_Neighbor_alltoallv( + send_buffer_index.data(), num_items_per_dest.data(), send_disp.data(), + MPI_INT64_T, recv_buffer_index.data(), num_items_recv.data(), + recv_disp.data(), MPI_INT64_T, neigh_comm); + dolfinx::MPI::check_error(comm, err); // Send/receive data (x) MPI_Datatype compound_type; MPI_Type_contiguous(shape[1], dolfinx::MPI::mpi_type(), &compound_type); MPI_Type_commit(&compound_type); std::vector recv_buffer_data(shape[1] * recv_disp.back()); - MPI_Neighbor_alltoallv(send_buffer_data.data(), num_items_per_dest.data(), - send_disp.data(), compound_type, - recv_buffer_data.data(), num_items_recv.data(), - recv_disp.data(), compound_type, neigh_comm); - MPI_Type_free(&compound_type); - MPI_Comm_free(&neigh_comm); + err = MPI_Neighbor_alltoallv( + send_buffer_data.data(), num_items_per_dest.data(), send_disp.data(), + compound_type, recv_buffer_data.data(), num_items_recv.data(), + recv_disp.data(), compound_type, neigh_comm); + dolfinx::MPI::check_error(comm, err); + err = MPI_Type_free(&compound_type); + dolfinx::MPI::check_error(comm, err); + err = MPI_Comm_free(&neigh_comm); + dolfinx::MPI::check_error(comm, err); LOG(2) << "Completed send data to post offices."; @@ -512,16 +518,18 @@ std::vector distribute_from_postoffice( // Create neighbourhood communicator for sending data to post offices // (src), and receiving data form my send my post office MPI_Comm neigh_comm0; - MPI_Dist_graph_create_adjacent(comm, dest.size(), dest.data(), MPI_UNWEIGHTED, - src.size(), src.data(), MPI_UNWEIGHTED, - MPI_INFO_NULL, false, &neigh_comm0); + int err = MPI_Dist_graph_create_adjacent( + comm, dest.size(), dest.data(), MPI_UNWEIGHTED, src.size(), src.data(), + MPI_UNWEIGHTED, MPI_INFO_NULL, false, &neigh_comm0); + dolfinx::MPI::check_error(comm, err); // Communicate number of requests to each source std::vector num_items_recv(dest.size()); num_items_per_src.reserve(1); num_items_recv.reserve(1); - MPI_Neighbor_alltoall(num_items_per_src.data(), 1, MPI_INT, - num_items_recv.data(), 1, MPI_INT, neigh_comm0); + err = MPI_Neighbor_alltoall(num_items_per_src.data(), 1, MPI_INT, + num_items_recv.data(), 1, MPI_INT, neigh_comm0); + dolfinx::MPI::check_error(comm, err); // Prepare send/receive displacements std::vector send_disp = {0}; @@ -541,12 +549,14 @@ std::vector distribute_from_postoffice( // Prepare the receive buffer std::vector recv_buffer_index(recv_disp.back()); - MPI_Neighbor_alltoallv(send_buffer_index.data(), num_items_per_src.data(), - send_disp.data(), MPI_INT64_T, - recv_buffer_index.data(), num_items_recv.data(), - recv_disp.data(), MPI_INT64_T, neigh_comm0); + err = MPI_Neighbor_alltoallv( + send_buffer_index.data(), num_items_per_src.data(), send_disp.data(), + MPI_INT64_T, recv_buffer_index.data(), num_items_recv.data(), + recv_disp.data(), MPI_INT64_T, neigh_comm0); + dolfinx::MPI::check_error(comm, err); - MPI_Comm_free(&neigh_comm0); + err = MPI_Comm_free(&neigh_comm0); + dolfinx::MPI::check_error(comm, err); // 2. Send data (rows of x) back to requesting ranks (transpose of the // preceding communication pattern operation) @@ -593,22 +603,26 @@ std::vector distribute_from_postoffice( } } - MPI_Dist_graph_create_adjacent(comm, src.size(), src.data(), MPI_UNWEIGHTED, - dest.size(), dest.data(), MPI_UNWEIGHTED, - MPI_INFO_NULL, false, &neigh_comm0); + err = MPI_Dist_graph_create_adjacent( + comm, src.size(), src.data(), MPI_UNWEIGHTED, dest.size(), dest.data(), + MPI_UNWEIGHTED, MPI_INFO_NULL, false, &neigh_comm0); + dolfinx::MPI::check_error(comm, err); MPI_Datatype compound_type0; MPI_Type_contiguous(shape[1], dolfinx::MPI::mpi_type(), &compound_type0); MPI_Type_commit(&compound_type0); std::vector recv_buffer_data(shape[1] * send_disp.back()); - MPI_Neighbor_alltoallv(send_buffer_data.data(), num_items_recv.data(), - recv_disp.data(), compound_type0, - recv_buffer_data.data(), num_items_per_src.data(), - send_disp.data(), compound_type0, neigh_comm0); + err = MPI_Neighbor_alltoallv( + send_buffer_data.data(), num_items_recv.data(), recv_disp.data(), + compound_type0, recv_buffer_data.data(), num_items_per_src.data(), + send_disp.data(), compound_type0, neigh_comm0); + dolfinx::MPI::check_error(comm, err); - MPI_Type_free(&compound_type0); - MPI_Comm_free(&neigh_comm0); + err = MPI_Type_free(&compound_type0); + dolfinx::MPI::check_error(comm, err); + err = MPI_Comm_free(&neigh_comm0); + dolfinx::MPI::check_error(comm, err); std::vector index_pos_to_buffer(indices.size(), -1); for (std::size_t i = 0; i < src_to_index.size(); ++i) @@ -661,8 +675,11 @@ std::vector distribute_data(MPI_Comm comm, const std::int64_t shape0_local = x.size() / shape1; std::int64_t shape0(0), rank_offset(0); - MPI_Allreduce(&shape0_local, &shape0, 1, MPI_INT64_T, MPI_SUM, comm); - MPI_Exscan(&shape0_local, &rank_offset, 1, MPI_INT64_T, MPI_SUM, comm); + int err + = MPI_Allreduce(&shape0_local, &shape0, 1, MPI_INT64_T, MPI_SUM, comm); + dolfinx::MPI::check_error(comm, err); + err = MPI_Exscan(&shape0_local, &rank_offset, 1, MPI_INT64_T, MPI_SUM, comm); + dolfinx::MPI::check_error(comm, err); return distribute_from_postoffice(comm, indices, x, {shape0, shape1}, rank_offset); diff --git a/cpp/dolfinx/common/Table.cpp b/cpp/dolfinx/common/Table.cpp index 1a849dbe647..dbaf7f5e70c 100644 --- a/cpp/dolfinx/common/Table.cpp +++ b/cpp/dolfinx/common/Table.cpp @@ -119,11 +119,14 @@ Table Table::reduce(MPI_Comm comm, Table::Reduction reduction) const // Get string data size on each process std::vector pcounts(mpi_size), offsets(mpi_size + 1, 0); const int local_size_str = keys.size(); - MPI_Gather(&local_size_str, 1, MPI_INT, pcounts.data(), 1, MPI_INT, 0, comm); + int err = MPI_Gather(&local_size_str, 1, MPI_INT, pcounts.data(), 1, MPI_INT, + 0, comm); + dolfinx::MPI::check_error(comm, err); std::partial_sum(pcounts.begin(), pcounts.end(), offsets.begin() + 1); std::vector out_str(offsets.back()); - MPI_Gatherv(keys.data(), keys.size(), MPI_CHAR, out_str.data(), - pcounts.data(), offsets.data(), MPI_CHAR, 0, comm); + err = MPI_Gatherv(keys.data(), keys.size(), MPI_CHAR, out_str.data(), + pcounts.data(), offsets.data(), MPI_CHAR, 0, comm); + dolfinx::MPI::check_error(comm, err); // Rebuild string std::vector keys_all(mpi_size); @@ -135,11 +138,15 @@ Table Table::reduce(MPI_Comm comm, Table::Reduction reduction) const // Get value data size on each process const int local_size = values.size(); - MPI_Gather(&local_size, 1, MPI_INT, pcounts.data(), 1, MPI_INT, 0, comm); + err = MPI_Gather(&local_size, 1, MPI_INT, pcounts.data(), 1, MPI_INT, 0, + comm); + dolfinx::MPI::check_error(comm, err); std::partial_sum(pcounts.begin(), pcounts.end(), offsets.begin() + 1); + std::vector values_all(offsets.back()); - MPI_Gatherv(values.data(), values.size(), MPI_DOUBLE, values_all.data(), - pcounts.data(), offsets.data(), MPI_DOUBLE, 0, comm); + err = MPI_Gatherv(values.data(), values.size(), MPI_DOUBLE, values_all.data(), + pcounts.data(), offsets.data(), MPI_DOUBLE, 0, comm); + dolfinx::MPI::check_error(comm, err); // Return empty table on rank > 0 if (MPI::rank(comm) > 0) diff --git a/cpp/dolfinx/common/utils.h b/cpp/dolfinx/common/utils.h index 0ecb923d5e4..10b6d6513d6 100644 --- a/cpp/dolfinx/common/utils.h +++ b/cpp/dolfinx/common/utils.h @@ -87,16 +87,19 @@ std::size_t hash_global(MPI_Comm comm, const T& x) // Gather hash keys on root process std::vector all_hashes(dolfinx::MPI::size(comm)); - MPI_Gather(&local_hash, 1, dolfinx::MPI::mpi_type(), - all_hashes.data(), 1, dolfinx::MPI::mpi_type(), 0, - comm); + int err = MPI_Gather(&local_hash, 1, dolfinx::MPI::mpi_type(), + all_hashes.data(), 1, + dolfinx::MPI::mpi_type(), 0, comm); + dolfinx::MPI::check_error(comm, err); // Hash the received hash keys boost::hash> hash; std::size_t global_hash = hash(all_hashes); // Broadcast hash key to all processes - MPI_Bcast(&global_hash, 1, dolfinx::MPI::mpi_type(), 0, comm); + err = MPI_Bcast(&global_hash, 1, dolfinx::MPI::mpi_type(), 0, + comm); + dolfinx::MPI::check_error(comm, err); return global_hash; } From 220805fa9f588f078ebade904d36741165061db5 Mon Sep 17 00:00:00 2001 From: IgorBaratta Date: Sat, 8 Oct 2022 16:38:32 +0100 Subject: [PATCH 08/11] update scatterer --- cpp/dolfinx/common/Scatterer.cpp | 27 +++++++++++++++++---------- cpp/dolfinx/common/Scatterer.h | 6 ++++-- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/cpp/dolfinx/common/Scatterer.cpp b/cpp/dolfinx/common/Scatterer.cpp index b444015c7e5..c53326944c0 100644 --- a/cpp/dolfinx/common/Scatterer.cpp +++ b/cpp/dolfinx/common/Scatterer.cpp @@ -33,18 +33,20 @@ Scatterer::Scatterer(const IndexMap& map, int bs) // (0) owner -> ghost, // (1) ghost -> owner MPI_Comm comm0; - MPI_Dist_graph_create_adjacent( + int err = MPI_Dist_graph_create_adjacent( map.comm(), src_ranks.size(), src_ranks.data(), MPI_UNWEIGHTED, dest_ranks.size(), dest_ranks.data(), MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm0); _comm0 = dolfinx::MPI::Comm(comm0, false); + dolfinx::MPI::check_error(map.comm(), err); MPI_Comm comm1; - MPI_Dist_graph_create_adjacent( + int err = MPI_Dist_graph_create_adjacent( map.comm(), dest_ranks.size(), dest_ranks.data(), MPI_UNWEIGHTED, src_ranks.size(), src_ranks.data(), MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm1); _comm1 = dolfinx::MPI::Comm(comm1, false); + dolfinx::MPI::check_error(map.comm(), err); // Build permutation array that sorts ghost indices by owning rank const std::vector& owners = map.owners(); @@ -91,8 +93,10 @@ Scatterer::Scatterer(const IndexMap& map, int bs) _displs_local.resize(_sizes_local.size() + 1); _sizes_remote.reserve(1); _sizes_local.reserve(1); - MPI_Neighbor_alltoall(_sizes_remote.data(), 1, MPI_INT32_T, - _sizes_local.data(), 1, MPI_INT32_T, _comm1.comm()); + err = MPI_Neighbor_alltoall(_sizes_remote.data(), 1, MPI_INT32_T, + _sizes_local.data(), 1, MPI_INT32_T, + _comm1.comm()); + dolfinx::MPI::check_error(map.comm(), err); std::partial_sum(_sizes_local.begin(), _sizes_local.end(), std::next(_displs_local.begin())); @@ -102,10 +106,11 @@ Scatterer::Scatterer(const IndexMap& map, int bs) // Send ghost global indices to owning rank, and receive owned // indices that are ghosts on other ranks std::vector recv_buffer(_displs_local.back(), 0); - MPI_Neighbor_alltoallv(ghosts_sorted.data(), _sizes_remote.data(), - _displs_remote.data(), MPI_INT64_T, - recv_buffer.data(), _sizes_local.data(), - _displs_local.data(), MPI_INT64_T, _comm1.comm()); + err = MPI_Neighbor_alltoallv( + ghosts_sorted.data(), _sizes_remote.data(), _displs_remote.data(), + MPI_INT64_T, recv_buffer.data(), _sizes_local.data(), + _displs_local.data(), MPI_INT64_T, _comm1.comm()); + dolfinx::MPI::check_error(map.comm(), err); const std::array range = map.local_range(); #ifndef NDEBUG @@ -151,7 +156,8 @@ void Scatterer::scatter_fwd_end(MPI_Request& request) const return; // Wait for communication to complete - MPI_Wait(&request, MPI_STATUS_IGNORE); + int err = MPI_Wait(&request, MPI_STATUS_IGNORE); + dolfinx::MPI::check_error(_comm0.comm(), err); } //----------------------------------------------------------------------------- void Scatterer::scatter_rev_end(MPI_Request& request) const @@ -161,7 +167,8 @@ void Scatterer::scatter_rev_end(MPI_Request& request) const return; // Wait for communication to complete - MPI_Wait(&request, MPI_STATUS_IGNORE); + int err = MPI_Wait(&request, MPI_STATUS_IGNORE); + dolfinx::MPI::check_error(_comm0.comm(), err); } //----------------------------------------------------------------------------- std::int32_t Scatterer::local_buffer_size() const noexcept diff --git a/cpp/dolfinx/common/Scatterer.h b/cpp/dolfinx/common/Scatterer.h index e0f3ec3488d..d4400e4d9e0 100644 --- a/cpp/dolfinx/common/Scatterer.h +++ b/cpp/dolfinx/common/Scatterer.h @@ -61,10 +61,11 @@ class Scatterer if (_sizes_local.empty() and _sizes_remote.empty()) return; - MPI_Ineighbor_alltoallv( + int err = MPI_Ineighbor_alltoallv( send_buffer.data(), _sizes_local.data(), _displs_local.data(), MPI::mpi_type(), recv_buffer.data(), _sizes_remote.data(), _displs_remote.data(), MPI::mpi_type(), _comm0.comm(), &request); + dolfinx::MPI::check_error(_comm0.comm(), err); } /// @brief Complete a non-blocking send from the local owner to @@ -207,10 +208,11 @@ class Scatterer return; // Send and receive data - MPI_Ineighbor_alltoallv( + int err = MPI_Ineighbor_alltoallv( send_buffer.data(), _sizes_remote.data(), _displs_remote.data(), MPI::mpi_type(), recv_buffer.data(), _sizes_local.data(), _displs_local.data(), MPI::mpi_type(), _comm1.comm(), &request); + dolfinx::MPI::check_error(_comm1.comm(), err); } /// @brief End the reverse scatter communication. From ca3748c68bfa8f03c4f75cc584ffe10a93b41577 Mon Sep 17 00:00:00 2001 From: IgorBaratta Date: Sat, 8 Oct 2022 23:26:16 +0100 Subject: [PATCH 09/11] fix error --- cpp/dolfinx/common/Scatterer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/dolfinx/common/Scatterer.cpp b/cpp/dolfinx/common/Scatterer.cpp index c53326944c0..daa808508f3 100644 --- a/cpp/dolfinx/common/Scatterer.cpp +++ b/cpp/dolfinx/common/Scatterer.cpp @@ -41,7 +41,7 @@ Scatterer::Scatterer(const IndexMap& map, int bs) dolfinx::MPI::check_error(map.comm(), err); MPI_Comm comm1; - int err = MPI_Dist_graph_create_adjacent( + err = MPI_Dist_graph_create_adjacent( map.comm(), dest_ranks.size(), dest_ranks.data(), MPI_UNWEIGHTED, src_ranks.size(), src_ranks.data(), MPI_UNWEIGHTED, MPI_INFO_NULL, false, &comm1); From 333e464e76c3c60c63560529f7b5aee977aee600 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Tue, 11 Oct 2022 10:31:01 +0100 Subject: [PATCH 10/11] Minor docstring edits --- cpp/dolfinx/common/MPI.cpp | 12 +++++------- cpp/dolfinx/common/MPI.h | 9 ++++----- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/cpp/dolfinx/common/MPI.cpp b/cpp/dolfinx/common/MPI.cpp index 7b21e249b40..8c120a81bf0 100644 --- a/cpp/dolfinx/common/MPI.cpp +++ b/cpp/dolfinx/common/MPI.cpp @@ -76,20 +76,18 @@ int dolfinx::MPI::size(const MPI_Comm comm) return size; } //----------------------------------------------------------------------------- -void dolfinx::MPI::check_error(MPI_Comm comm, int error_code) +void dolfinx::MPI::check_error(MPI_Comm comm, int code) { - if (error_code != MPI_SUCCESS) + if (code != MPI_SUCCESS) { int len = MPI_MAX_ERROR_STRING; - std::string error_string(len, ' '); - MPI_Error_string(error_code, error_string.data(), &len); + std::string error_string(MPI_MAX_ERROR_STRING, ' '); + MPI_Error_string(code, error_string.data(), &len); error_string.resize(len); - // Output error message std::cerr << error_string << std::endl; - MPI_Abort(comm, error_code); + MPI_Abort(comm, code); - // Terminate execution std::abort(); } } diff --git a/cpp/dolfinx/common/MPI.h b/cpp/dolfinx/common/MPI.h index c0d0e45aa1d..308c7c53441 100644 --- a/cpp/dolfinx/common/MPI.h +++ b/cpp/dolfinx/common/MPI.h @@ -74,12 +74,11 @@ int rank(MPI_Comm comm); /// communicator int size(MPI_Comm comm); -/// @brief Checks wether an error code returned by an MPI -/// function is equal to MPI_SUCCESS. If the check fails then -/// calls abort. +/// @brief Checks wether an error code returned by an MPI function is +/// equal to MPI_SUCCESS. If the check fails then std::abort is called. /// @param[in] comm MPI communicator -/// @param[in] error_code Error code returned by an MPI function call. -void check_error(MPI_Comm comm, int error_code); +/// @param[in] code Error code returned by an MPI function call +void check_error(MPI_Comm comm, int code); /// @brief Return local range for the calling process, partitioning the /// global [0, N - 1] range across all ranks into partitions of almost From 2aeca6d74dc3b9089f31e9c34dc4835ff1c8f86e Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Tue, 11 Oct 2022 10:33:11 +0100 Subject: [PATCH 11/11] Tweak docstring --- cpp/dolfinx/common/MPI.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/dolfinx/common/MPI.h b/cpp/dolfinx/common/MPI.h index 308c7c53441..c287ee04e1f 100644 --- a/cpp/dolfinx/common/MPI.h +++ b/cpp/dolfinx/common/MPI.h @@ -74,8 +74,8 @@ int rank(MPI_Comm comm); /// communicator int size(MPI_Comm comm); -/// @brief Checks wether an error code returned by an MPI function is -/// equal to MPI_SUCCESS. If the check fails then std::abort is called. +/// @brief Check MPI error code. If the error code is not equal to +/// MPI_SUCCESS, then std::abort is called. /// @param[in] comm MPI communicator /// @param[in] code Error code returned by an MPI function call void check_error(MPI_Comm comm, int code);