From 3b92f40153f1caa4c5572560dbd73ad525f38d8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Tue, 26 Jan 2021 12:09:03 +0100 Subject: [PATCH 01/16] core: Remove unused Utils::Mpi::all_gatherv() --- src/utils/include/utils/mpi/all_gatherv.hpp | 100 ------------------- src/utils/tests/CMakeLists.txt | 2 - src/utils/tests/all_gatherv_test.cpp | 101 -------------------- 3 files changed, 203 deletions(-) delete mode 100644 src/utils/include/utils/mpi/all_gatherv.hpp delete mode 100644 src/utils/tests/all_gatherv_test.cpp diff --git a/src/utils/include/utils/mpi/all_gatherv.hpp b/src/utils/include/utils/mpi/all_gatherv.hpp deleted file mode 100644 index 2d746fc779d..00000000000 --- a/src/utils/include/utils/mpi/all_gatherv.hpp +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (C) 2010-2019 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef UTILS_MPI_ALL_GATHERV_HPP -#define UTILS_MPI_ALL_GATHERV_HPP - -#include -#include -#include -#include -#include - -namespace Utils { -namespace Mpi { - -namespace detail { -template -void all_gatherv_impl(const boost::mpi::communicator &comm, const T *in_values, - int in_size, T *out_values, const int *sizes, - const int *displs, boost::mpl::true_) { - MPI_Datatype type = boost::mpi::get_mpi_datatype(); - - /* in-place ? */ - if (in_values == out_values) { - BOOST_MPI_CHECK_RESULT(MPI_Allgatherv, - (MPI_IN_PLACE, 0, type, out_values, - const_cast(sizes), const_cast(displs), - type, comm)); - } else { - BOOST_MPI_CHECK_RESULT(MPI_Allgatherv, - (const_cast(in_values), in_size, type, - out_values, const_cast(sizes), - const_cast(displs), type, comm)); - } -} - -template -void all_gatherv_impl(const boost::mpi::communicator &comm, const T *in_values, - int in_size, T *out_values, const int *sizes, - const int *displs, boost::mpl::false_) { - auto const n_nodes = comm.size(); - auto const rank = comm.rank(); - - /* not in-place */ - if (in_values != out_values) { - std::copy_n(in_values, in_size, out_values + displs[rank]); - } - - std::vector req; - for (int i = 0; i < n_nodes; i++) { - if (i != rank) { - req.emplace_back(comm.isend(i, 42, out_values + displs[rank], in_size)); - req.emplace_back(comm.irecv(i, 42, out_values + displs[i], sizes[i])); - } - } - - boost::mpi::wait_all(req.begin(), req.end()); -} -} // namespace detail - -template -void all_gatherv(const boost::mpi::communicator &comm, const T *in_values, - int in_size, T *out_values, const int *sizes, - const int *displs) { - detail::all_gatherv_impl(comm, in_values, in_size, out_values, sizes, displs, - boost::mpi::is_mpi_datatype()); -} - -template -void all_gatherv(const boost::mpi::communicator &comm, const T *in_values, - int in_size, T *out_values, const int *sizes) { - std::vector displ(comm.size()); - - int offset = 0; - for (unsigned i = 0; i < displ.size(); i++) { - displ[i] = offset; - offset += sizes[i]; - } - - detail::all_gatherv_impl(comm, in_values, in_size, out_values, sizes, - displ.data(), boost::mpi::is_mpi_datatype()); -} -} // namespace Mpi -} // namespace Utils -#endif diff --git a/src/utils/tests/CMakeLists.txt b/src/utils/tests/CMakeLists.txt index 7ad6fd17cef..90508ce2539 100644 --- a/src/utils/tests/CMakeLists.txt +++ b/src/utils/tests/CMakeLists.txt @@ -74,8 +74,6 @@ unit_test(NAME all_compare_test SRC all_compare_test.cpp DEPENDS EspressoUtils Boost::mpi MPI::MPI_CXX NUM_PROC 3) unit_test(NAME gatherv_test SRC gatherv_test.cpp DEPENDS EspressoUtils Boost::mpi MPI::MPI_CXX NUM_PROC 3) -unit_test(NAME all_gatherv_test SRC all_gatherv_test.cpp DEPENDS EspressoUtils - Boost::mpi MPI::MPI_CXX EspressoUtils NUM_PROC 2) unit_test(NAME sendrecv_test SRC sendrecv_test.cpp DEPENDS EspressoUtils Boost::mpi MPI::MPI_CXX EspressoUtils NUM_PROC 3) unit_test(NAME matrix_test SRC matrix_test.cpp DEPENDS EspressoUtils diff --git a/src/utils/tests/all_gatherv_test.cpp b/src/utils/tests/all_gatherv_test.cpp deleted file mode 100644 index a0685e529ff..00000000000 --- a/src/utils/tests/all_gatherv_test.cpp +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (C) 2017-2019 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#define BOOST_TEST_NO_MAIN -#define BOOST_TEST_MODULE all_gather test -#define BOOST_TEST_DYN_LINK -#include -#include - -#include -using Utils::Mpi::all_gatherv; - -#include -#include - -namespace mpi = boost::mpi; - -BOOST_AUTO_TEST_CASE(mpi_type) { - mpi::communicator world; - auto const rank = world.rank(); - auto const size = world.size(); - - /* out-of-place */ - { - std::vector out(size, -1); - std::vector sizes(size, 1); - - all_gatherv(world, &rank, 1, out.data(), sizes.data()); - - for (int i = 0; i < size; i++) { - BOOST_CHECK_EQUAL(i, out.at(i)); - } - } - - /* in-place */ - { - std::vector out(size, -1); - out[rank] = rank; - std::vector sizes(size, 1); - - all_gatherv(world, out.data(), 1, out.data(), sizes.data()); - - for (int i = 0; i < size; i++) { - BOOST_CHECK_EQUAL(i, out.at(i)); - } - } -} - -BOOST_AUTO_TEST_CASE(non_mpi_type) { - mpi::communicator world; - auto const rank = world.rank(); - auto const size = world.size(); - auto const in = std::to_string(rank); - - /* out-of-place */ - { - std::vector out(size); - std::vector sizes(size, 1); - - all_gatherv(world, &in, 1, out.data(), sizes.data()); - - for (int i = 0; i < size; i++) { - BOOST_CHECK_EQUAL(std::to_string(i), out.at(i)); - } - } - - /* in-place */ - { - std::vector out(size); - out[rank] = in; - std::vector sizes(size, 1); - - all_gatherv(world, out.data(), 1, out.data(), sizes.data()); - - for (int i = 0; i < size; i++) { - BOOST_CHECK_EQUAL(std::to_string(i), out.at(i)); - } - } -} - -int main(int argc, char **argv) { - mpi::environment mpi_env(argc, argv); - - return boost::unit_test::unit_test_main(init_unit_test, argc, argv); -} From dc0c997b8b3173715677a5dc31d6d8dbb7fe4db6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Tue, 26 Jan 2021 12:11:50 +0100 Subject: [PATCH 02/16] core: Remove unused Utils::realloc() --- src/utils/include/utils/memory.hpp | 32 ------------------------------ 1 file changed, 32 deletions(-) diff --git a/src/utils/include/utils/memory.hpp b/src/utils/include/utils/memory.hpp index 200c456a18b..867104a8faa 100644 --- a/src/utils/include/utils/memory.hpp +++ b/src/utils/include/utils/memory.hpp @@ -21,39 +21,9 @@ #include #include -#include #include namespace Utils { - -/*************************************************************/ -/** \name Dynamic memory allocation. */ -/*************************************************************/ -/**@{*/ - -/* to enable us to make sure that freed pointers are invalidated, we normally - try to use realloc. - Unfortunately allocating zero bytes (which should be avoided) actually - allocates 16 bytes, and - reallocating to 0 also. To avoid this, we use our own malloc and realloc - procedures. */ - -/** used instead of realloc. - Makes sure that resizing to zero FREEs pointer */ -template inline T *realloc(T *old, size_t size) { - if (size == 0) { - ::free(static_cast(old)); - return nullptr; - } - - auto *p = static_cast(::realloc(static_cast(old), size)); - - if (p == nullptr) { - throw std::bad_alloc{}; - } - return p; -} - /** used instead of malloc. Makes sure that a zero size allocation returns a nullptr pointer */ inline void *malloc(size_t size) { @@ -68,8 +38,6 @@ inline void *malloc(size_t size) { } return p; } - -/**@}*/ } // namespace Utils #endif From 63f8c96f78c1fcb1d617dafd14df15c788383275 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Tue, 26 Jan 2021 12:27:29 +0100 Subject: [PATCH 03/16] core: Use static memory allocation --- src/core/grid_based_algorithms/lb.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/core/grid_based_algorithms/lb.cpp b/src/core/grid_based_algorithms/lb.cpp index 42a047e3b6d..09b5486f2c1 100644 --- a/src/core/grid_based_algorithms/lb.cpp +++ b/src/core/grid_based_algorithms/lb.cpp @@ -45,7 +45,6 @@ #include #include #include -#include #include #include @@ -1019,13 +1018,12 @@ static int compare_buffers(double *buf1, double *buf2, int size) { void lb_check_halo_regions(const LB_Fluid &lb_fluid, const Lattice &lb_lattice) { Lattice::index_t index; - int i, x, y, z, s_node, r_node, count = D3Q19::n_vel; - double *s_buffer, *r_buffer; + int i, x, y, z, s_node, r_node; + constexpr auto count = static_cast(D3Q19::n_vel); + double s_buffer[count]; + double r_buffer[count]; MPI_Status status[2]; - r_buffer = (double *)Utils::malloc(count * sizeof(double)); - s_buffer = (double *)Utils::malloc(count * sizeof(double)); - auto const node_neighbors = calc_node_neighbors(comm_cart); if (box_geo.periodic(0)) { @@ -1215,9 +1213,6 @@ void lb_check_halo_regions(const LB_Fluid &lb_fluid, } } } - - free(r_buffer); - free(s_buffer); } #endif // ADDITIONAL_CHECKS From 436ce073390216f178a08592b5ce7398e209a090 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Tue, 26 Jan 2021 13:18:44 +0100 Subject: [PATCH 04/16] core: Remove a global variable in LB GPU code Also fixes -Wshadow. --- src/core/grid_based_algorithms/lbgpu_cuda.cu | 29 +++++++------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/src/core/grid_based_algorithms/lbgpu_cuda.cu b/src/core/grid_based_algorithms/lbgpu_cuda.cu index edc73900c90..9bfa6ae58ad 100644 --- a/src/core/grid_based_algorithms/lbgpu_cuda.cu +++ b/src/core/grid_based_algorithms/lbgpu_cuda.cu @@ -97,11 +97,8 @@ LB_node_force_density_gpu node_f = { static float *lb_boundary_force = nullptr; #endif -/** @name pointers for additional cuda check flag */ -/**@{*/ -static int *gpu_check = nullptr; -static int *h_gpu_check = nullptr; -/**@}*/ +/** @brief Whether LB GPU was initialized */ +static int *device_gpu_lb_initialized = nullptr; /** @brief Direction of data transfer between @ref nodes_a and @ref nodes_b * during integration in @ref lb_integrate_GPU @@ -2428,13 +2425,7 @@ void lb_init_GPU(LB_parameters_gpu *lbpar_gpu) { /*write parameters in const memory*/ cuda_safe_mem(cudaMemcpyToSymbol(para, lbpar_gpu, sizeof(LB_parameters_gpu))); - /*check flag if lb gpu init works*/ - free_realloc_and_clear(gpu_check, sizeof(int)); - - if (h_gpu_check != nullptr) - free(h_gpu_check); - - h_gpu_check = (int *)Utils::malloc(sizeof(int)); + free_realloc_and_clear(device_gpu_lb_initialized, sizeof(int)); /* values for the kernel call */ int threads_per_block = 64; @@ -2450,17 +2441,17 @@ void lb_init_GPU(LB_parameters_gpu *lbpar_gpu) { * Node_Force array with zero */ KERNELCALL(reinit_node_force, dim_grid, threads_per_block, (node_f)); KERNELCALL(calc_n_from_rho_j_pi, dim_grid, threads_per_block, nodes_a, - device_rho_v, node_f, gpu_check); + device_rho_v, node_f, device_gpu_lb_initialized); intflag = true; current_nodes = &nodes_a; - h_gpu_check[0] = 0; - cuda_safe_mem( - cudaMemcpy(h_gpu_check, gpu_check, sizeof(int), cudaMemcpyDeviceToHost)); + int host_gpu_lb_initialized = 0; + cuda_safe_mem(cudaMemcpy(&host_gpu_lb_initialized, device_gpu_lb_initialized, + sizeof(int), cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); - if (!h_gpu_check[0]) { - fprintf(stderr, "initialization of lb gpu code failed! \n"); + if (!host_gpu_lb_initialized) { + fprintf(stderr, "initialization of LB GPU code failed!\n"); errexit(); } } @@ -2483,7 +2474,7 @@ void lb_reinit_GPU(LB_parameters_gpu *lbpar_gpu) { /* calc of velocity densities from given parameters and initialize the * Node_Force array with zero */ KERNELCALL(calc_n_from_rho_j_pi, dim_grid, threads_per_block, nodes_a, - device_rho_v, node_f, gpu_check); + device_rho_v, node_f, device_gpu_lb_initialized); } #ifdef LB_BOUNDARIES_GPU From 619f938d639421ecb999be1391e713c5fae6033d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 27 Jan 2021 13:05:10 +0100 Subject: [PATCH 05/16] core: Use dynamic memory allocation --- src/core/grid_based_algorithms/halo.cpp | 33 +++++++++---------------- src/core/grid_based_algorithms/halo.hpp | 14 +++++------ 2 files changed, 18 insertions(+), 29 deletions(-) diff --git a/src/core/grid_based_algorithms/halo.cpp b/src/core/grid_based_algorithms/halo.cpp index a7055e684da..a00ecb50bc5 100644 --- a/src/core/grid_based_algorithms/halo.cpp +++ b/src/core/grid_based_algorithms/halo.cpp @@ -39,8 +39,8 @@ #include /** Primitive fieldtypes and their initializers */ -struct _Fieldtype fieldtype_double = {0, nullptr, nullptr, sizeof(double), 0, - 0, 0, false, nullptr}; +struct _Fieldtype fieldtype_double = {0, {}, {}, sizeof(double), 0, + 0, 0, false, nullptr}; void halo_create_field_vector(int vblocks, int vstride, int vskip, Fieldtype oldtype, Fieldtype *const newtype) { @@ -56,14 +56,9 @@ void halo_create_field_vector(int vblocks, int vstride, int vskip, ntype->extent = oldtype->extent * ((vblocks - 1) * vskip + vstride); - int count = ntype->count = oldtype->count; - ntype->lengths = (int *)Utils::malloc(count * 2 * sizeof(int)); - ntype->disps = (int *)((char *)ntype->lengths + count * sizeof(int)); - - for (int i = 0; i < count; i++) { - ntype->disps[i] = oldtype->disps[i]; - ntype->lengths[i] = oldtype->lengths[i]; - } + ntype->count = oldtype->count; + ntype->lengths = oldtype->lengths; + ntype->disps = oldtype->disps; } void halo_create_field_hvector(int vblocks, int vstride, int vskip, @@ -80,14 +75,9 @@ void halo_create_field_hvector(int vblocks, int vstride, int vskip, ntype->extent = oldtype->extent * vstride + (vblocks - 1) * vskip; - int const count = ntype->count = oldtype->count; - ntype->lengths = (int *)Utils::malloc(count * 2 * sizeof(int)); - ntype->disps = (int *)((char *)ntype->lengths + count * sizeof(int)); - - for (int i = 0; i < count; i++) { - ntype->disps[i] = oldtype->disps[i]; - ntype->lengths[i] = oldtype->lengths[i]; - } + ntype->count = oldtype->count; + ntype->lengths = oldtype->lengths; + ntype->disps = oldtype->disps; } /** Set halo region to a given value @@ -99,15 +89,14 @@ void halo_dtset(char *dest, int value, Fieldtype type) { auto const vblocks = type->vblocks; auto const vstride = type->vstride; auto const vskip = type->vskip; - auto const count = type->count; - int const *const lens = type->lengths; - int const *const disps = type->disps; + auto const &lens = type->lengths; + auto const &disps = type->disps; auto const extent = type->extent; auto const block_size = static_cast(vskip) * static_cast(extent); for (int i = 0; i < vblocks; i++) { for (int j = 0; j < vstride; j++) { - for (int k = 0; k < count; k++) + for (std::size_t k = 0; k < disps.size(); k++) memset(dest + disps[k], value, lens[k]); } dest += block_size; diff --git a/src/core/grid_based_algorithms/halo.hpp b/src/core/grid_based_algorithms/halo.hpp index 193ee2c6ea4..8ca310fcaf2 100644 --- a/src/core/grid_based_algorithms/halo.hpp +++ b/src/core/grid_based_algorithms/halo.hpp @@ -60,13 +60,13 @@ */ typedef struct _Fieldtype *Fieldtype; struct _Fieldtype { - int count; /**< number of subtypes in fieldtype */ - int *disps; /**< displacements of the subtypes */ - int *lengths; /**< lengths of the subtypes */ - int extent; /**< extent of the complete fieldtype including gaps */ - int vblocks; /**< number of blocks in field vectors */ - int vstride; /**< size of strides in field vectors */ - int vskip; /**< displacement between strides in field vectors */ + int count; /**< number of subtypes in fieldtype */ + std::vector disps; /**< displacements of the subtypes */ + std::vector lengths; /**< lengths of the subtypes */ + int extent; /**< extent of the complete fieldtype including gaps */ + int vblocks; /**< number of blocks in field vectors */ + int vstride; /**< size of strides in field vectors */ + int vskip; /**< displacement between strides in field vectors */ bool vflag; Fieldtype subtype; }; From 59504ea1bdba7d61bc37a4f7b30a1e334e5825be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 27 Jan 2021 13:12:25 +0100 Subject: [PATCH 06/16] core: Fix memory leak --- src/core/grid_based_algorithms/halo.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/core/grid_based_algorithms/halo.cpp b/src/core/grid_based_algorithms/halo.cpp index a00ecb50bc5..1f23b958d19 100644 --- a/src/core/grid_based_algorithms/halo.cpp +++ b/src/core/grid_based_algorithms/halo.cpp @@ -44,7 +44,9 @@ struct _Fieldtype fieldtype_double = {0, {}, {}, sizeof(double), 0, void halo_create_field_vector(int vblocks, int vstride, int vskip, Fieldtype oldtype, Fieldtype *const newtype) { - + if (*newtype) { + free(*newtype); + } Fieldtype ntype = *newtype = (Fieldtype)Utils::malloc(sizeof(*ntype)); ntype->subtype = oldtype; @@ -63,7 +65,9 @@ void halo_create_field_vector(int vblocks, int vstride, int vskip, void halo_create_field_hvector(int vblocks, int vstride, int vskip, Fieldtype oldtype, Fieldtype *const newtype) { - + if (*newtype) { + free(*newtype); + } Fieldtype ntype = *newtype = (Fieldtype)Utils::malloc(sizeof(*ntype)); ntype->subtype = oldtype; From 504b155346760c524a053a5aecb6cf5ad3f1eb9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 27 Jan 2021 13:23:14 +0100 Subject: [PATCH 07/16] core: Simplify halo constructors --- src/core/grid_based_algorithms/halo.cpp | 51 ++++++++++--------------- 1 file changed, 20 insertions(+), 31 deletions(-) diff --git a/src/core/grid_based_algorithms/halo.cpp b/src/core/grid_based_algorithms/halo.cpp index 1f23b958d19..09f808edaf6 100644 --- a/src/core/grid_based_algorithms/halo.cpp +++ b/src/core/grid_based_algorithms/halo.cpp @@ -33,7 +33,6 @@ #include "halo.hpp" #include -#include #include #include @@ -45,43 +44,33 @@ struct _Fieldtype fieldtype_double = {0, {}, {}, sizeof(double), 0, void halo_create_field_vector(int vblocks, int vstride, int vskip, Fieldtype oldtype, Fieldtype *const newtype) { if (*newtype) { - free(*newtype); + delete *newtype; } - Fieldtype ntype = *newtype = (Fieldtype)Utils::malloc(sizeof(*ntype)); - - ntype->subtype = oldtype; - ntype->vflag = true; - - ntype->vblocks = vblocks; - ntype->vstride = vstride; - ntype->vskip = vskip; - - ntype->extent = oldtype->extent * ((vblocks - 1) * vskip + vstride); - - ntype->count = oldtype->count; - ntype->lengths = oldtype->lengths; - ntype->disps = oldtype->disps; + *newtype = new _Fieldtype{oldtype->count, + oldtype->disps, + oldtype->lengths, + oldtype->extent * ((vblocks - 1) * vskip + vstride), + vblocks, + vstride, + vskip, + true, + oldtype}; } void halo_create_field_hvector(int vblocks, int vstride, int vskip, Fieldtype oldtype, Fieldtype *const newtype) { if (*newtype) { - free(*newtype); + delete *newtype; } - Fieldtype ntype = *newtype = (Fieldtype)Utils::malloc(sizeof(*ntype)); - - ntype->subtype = oldtype; - ntype->vflag = false; - - ntype->vblocks = vblocks; - ntype->vstride = vstride; - ntype->vskip = vskip; - - ntype->extent = oldtype->extent * vstride + (vblocks - 1) * vskip; - - ntype->count = oldtype->count; - ntype->lengths = oldtype->lengths; - ntype->disps = oldtype->disps; + *newtype = new _Fieldtype{oldtype->count, + oldtype->disps, + oldtype->lengths, + oldtype->extent * vstride + (vblocks - 1) * vskip, + vblocks, + vstride, + vskip, + false, + oldtype}; } /** Set halo region to a given value From bf4ff5ebe3a1ec5239f85e33f2f313ab365f0252 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 27 Jan 2021 15:00:49 +0100 Subject: [PATCH 08/16] core: Remove Fieldtype pointer typedef for clarity --- src/core/grid_based_algorithms/halo.cpp | 58 ++++++++++++------------- src/core/grid_based_algorithms/halo.hpp | 15 +++---- 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/src/core/grid_based_algorithms/halo.cpp b/src/core/grid_based_algorithms/halo.cpp index 09f808edaf6..e8c24f42441 100644 --- a/src/core/grid_based_algorithms/halo.cpp +++ b/src/core/grid_based_algorithms/halo.cpp @@ -38,39 +38,39 @@ #include /** Primitive fieldtypes and their initializers */ -struct _Fieldtype fieldtype_double = {0, {}, {}, sizeof(double), 0, - 0, 0, false, nullptr}; +struct Fieldtype fieldtype_double = {0, {}, {}, sizeof(double), 0, + 0, 0, false, nullptr}; void halo_create_field_vector(int vblocks, int vstride, int vskip, - Fieldtype oldtype, Fieldtype *const newtype) { + Fieldtype *oldtype, Fieldtype **const newtype) { if (*newtype) { delete *newtype; } - *newtype = new _Fieldtype{oldtype->count, - oldtype->disps, - oldtype->lengths, - oldtype->extent * ((vblocks - 1) * vskip + vstride), - vblocks, - vstride, - vskip, - true, - oldtype}; + *newtype = new Fieldtype{oldtype->count, + oldtype->disps, + oldtype->lengths, + oldtype->extent * ((vblocks - 1) * vskip + vstride), + vblocks, + vstride, + vskip, + true, + oldtype}; } void halo_create_field_hvector(int vblocks, int vstride, int vskip, - Fieldtype oldtype, Fieldtype *const newtype) { + Fieldtype *oldtype, Fieldtype **const newtype) { if (*newtype) { delete *newtype; } - *newtype = new _Fieldtype{oldtype->count, - oldtype->disps, - oldtype->lengths, - oldtype->extent * vstride + (vblocks - 1) * vskip, - vblocks, - vstride, - vskip, - false, - oldtype}; + *newtype = new Fieldtype{oldtype->count, + oldtype->disps, + oldtype->lengths, + oldtype->extent * vstride + (vblocks - 1) * vskip, + vblocks, + vstride, + vskip, + false, + oldtype}; } /** Set halo region to a given value @@ -78,7 +78,7 @@ void halo_create_field_hvector(int vblocks, int vstride, int vskip, * @param value integer value to write into the halo buffer * @param type halo field layout description */ -void halo_dtset(char *dest, int value, Fieldtype type) { +void halo_dtset(char *dest, int value, Fieldtype *type) { auto const vblocks = type->vblocks; auto const vstride = type->vstride; auto const vskip = type->vskip; @@ -96,10 +96,10 @@ void halo_dtset(char *dest, int value, Fieldtype type) { } } -void halo_dtcopy(char *r_buffer, char *s_buffer, int count, Fieldtype type); +void halo_dtcopy(char *r_buffer, char *s_buffer, int count, Fieldtype *type); -void halo_copy_vector(char *r_buffer, char *s_buffer, int count, Fieldtype type, - bool vflag) { +void halo_copy_vector(char *r_buffer, char *s_buffer, int count, + Fieldtype *type, bool vflag) { auto const vblocks = type->vblocks; auto const vstride = type->vstride; @@ -124,7 +124,7 @@ void halo_copy_vector(char *r_buffer, char *s_buffer, int count, Fieldtype type, * @param count amount of data to copy * @param type field layout type */ -void halo_dtcopy(char *r_buffer, char *s_buffer, int count, Fieldtype type) { +void halo_dtcopy(char *r_buffer, char *s_buffer, int count, Fieldtype *type) { if (type->subtype) { halo_copy_vector(r_buffer, s_buffer, count, type, type->vflag); @@ -146,7 +146,7 @@ void halo_dtcopy(char *r_buffer, char *s_buffer, int count, Fieldtype type) { void prepare_halo_communication(HaloCommunicator *const hc, Lattice const *const lattice, - Fieldtype fieldtype, MPI_Datatype datatype, + Fieldtype *fieldtype, MPI_Datatype datatype, const Utils::Vector3i &local_node_grid) { const auto grid = lattice->grid; @@ -240,7 +240,7 @@ void release_halo_communication(HaloCommunicator *const hc) { void halo_communication(HaloCommunicator const *const hc, char *const base) { - Fieldtype fieldtype; + Fieldtype *fieldtype; MPI_Datatype datatype; MPI_Request request; MPI_Status status; diff --git a/src/core/grid_based_algorithms/halo.hpp b/src/core/grid_based_algorithms/halo.hpp index 8ca310fcaf2..6cdb105ea45 100644 --- a/src/core/grid_based_algorithms/halo.hpp +++ b/src/core/grid_based_algorithms/halo.hpp @@ -58,8 +58,7 @@ * See \ref halo_create_field_vector and \ref * halo_dtcopy to understand how it works. */ -typedef struct _Fieldtype *Fieldtype; -struct _Fieldtype { +struct Fieldtype { int count; /**< number of subtypes in fieldtype */ std::vector disps; /**< displacements of the subtypes */ std::vector lengths; /**< lengths of the subtypes */ @@ -68,11 +67,11 @@ struct _Fieldtype { int vstride; /**< size of strides in field vectors */ int vskip; /**< displacement between strides in field vectors */ bool vflag; - Fieldtype subtype; + Fieldtype *subtype; }; /** Predefined fieldtypes */ -extern struct _Fieldtype fieldtype_double; +extern struct Fieldtype fieldtype_double; #define FIELDTYPE_DOUBLE (&fieldtype_double) /** Structure describing a Halo region */ @@ -86,7 +85,7 @@ typedef struct { unsigned long s_offset; /**< offset for send buffer */ unsigned long r_offset; /**< offset for receive buffer */ - Fieldtype fieldtype; /**< type layout of the data being exchanged */ + Fieldtype *fieldtype; /**< type layout of the data being exchanged */ MPI_Datatype datatype; /**< MPI datatype of data being communicated */ } HaloInfo; @@ -110,9 +109,9 @@ class HaloCommunicator { * @param[out] newtype newly created fieldtype */ void halo_create_field_vector(int vblocks, int vstride, int vskip, - Fieldtype oldtype, Fieldtype *newtype); + Fieldtype *oldtype, Fieldtype **newtype); void halo_create_field_hvector(int vblocks, int vstride, int vskip, - Fieldtype oldtype, Fieldtype *newtype); + Fieldtype *oldtype, Fieldtype **newtype); /** Preparation of the halo parallelization scheme. Sets up the * necessary data structures for \ref halo_communication @@ -123,7 +122,7 @@ void halo_create_field_hvector(int vblocks, int vstride, int vskip, * @param local_node_grid Number of nodes in each spatial dimension */ void prepare_halo_communication(HaloCommunicator *hc, Lattice const *lattice, - Fieldtype fieldtype, MPI_Datatype datatype, + Fieldtype *fieldtype, MPI_Datatype datatype, const Utils::Vector3i &local_node_grid); /** Frees data structures associated with a halo communicator From 1f42f5aafa322a9f144498175d6ca938317db091 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 27 Jan 2021 15:04:18 +0100 Subject: [PATCH 09/16] core: Remove unnecessary macro --- src/core/grid_based_algorithms/halo.hpp | 1 - src/core/grid_based_algorithms/lb.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/grid_based_algorithms/halo.hpp b/src/core/grid_based_algorithms/halo.hpp index 6cdb105ea45..93664cb1466 100644 --- a/src/core/grid_based_algorithms/halo.hpp +++ b/src/core/grid_based_algorithms/halo.hpp @@ -72,7 +72,6 @@ struct Fieldtype { /** Predefined fieldtypes */ extern struct Fieldtype fieldtype_double; -#define FIELDTYPE_DOUBLE (&fieldtype_double) /** Structure describing a Halo region */ typedef struct { diff --git a/src/core/grid_based_algorithms/lb.cpp b/src/core/grid_based_algorithms/lb.cpp index 09b5486f2c1..88e1ae71943 100644 --- a/src/core/grid_based_algorithms/lb.cpp +++ b/src/core/grid_based_algorithms/lb.cpp @@ -652,7 +652,7 @@ void lb_prepare_communication(HaloCommunicator &halo_comm, * datatypes */ /* prepare the communication for a single velocity */ - prepare_halo_communication(&comm, &lb_lattice, FIELDTYPE_DOUBLE, MPI_DOUBLE, + prepare_halo_communication(&comm, &lb_lattice, &fieldtype_double, MPI_DOUBLE, node_grid); halo_comm.num = comm.num; From 8850a09ba05d291368beea2cf2f5db009331923d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 27 Jan 2021 15:14:44 +0100 Subject: [PATCH 10/16] core: Use dynamic memory allocation --- .../grid_based_algorithms/lb_boundaries.cpp | 2 +- src/core/grid_based_algorithms/lbgpu.hpp | 2 +- src/core/grid_based_algorithms/lbgpu_cuda.cu | 21 +++++++------------ 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/src/core/grid_based_algorithms/lb_boundaries.cpp b/src/core/grid_based_algorithms/lb_boundaries.cpp index 1df5ca9e854..18f6faf7557 100644 --- a/src/core/grid_based_algorithms/lb_boundaries.cpp +++ b/src/core/grid_based_algorithms/lb_boundaries.cpp @@ -277,7 +277,7 @@ Utils::Vector3d lbboundary_get_force(LBBoundary const *lbb) { std::vector forces(3 * lbboundaries.size()); if (lattice_switch == ActiveLB::GPU) { #if defined(LB_BOUNDARIES_GPU) && defined(CUDA) - lb_gpu_get_boundary_forces(forces.data()); + lb_gpu_get_boundary_forces(forces); #endif } else if (lattice_switch == ActiveLB::CPU) { #if defined(LB_BOUNDARIES) diff --git a/src/core/grid_based_algorithms/lbgpu.hpp b/src/core/grid_based_algorithms/lbgpu.hpp index 857fd23dc4b..f1913e78ccc 100644 --- a/src/core/grid_based_algorithms/lbgpu.hpp +++ b/src/core/grid_based_algorithms/lbgpu.hpp @@ -217,7 +217,7 @@ void lb_set_node_rho_GPU(int single_nodeindex, float host_rho); void reinit_parameters_GPU(LB_parameters_gpu *lbpar_gpu); void lb_reinit_extern_nodeforce_GPU(LB_parameters_gpu *lbpar_gpu); void lb_reinit_GPU(LB_parameters_gpu *lbpar_gpu); -void lb_gpu_get_boundary_forces(double *forces); +void lb_gpu_get_boundary_forces(std::vector &forces); void lb_save_checkpoint_GPU(float *host_checkpoint_vd); void lb_load_checkpoint_GPU(float const *host_checkpoint_vd); diff --git a/src/core/grid_based_algorithms/lbgpu_cuda.cu b/src/core/grid_based_algorithms/lbgpu_cuda.cu index 9bfa6ae58ad..5d3818c4d8c 100644 --- a/src/core/grid_based_algorithms/lbgpu_cuda.cu +++ b/src/core/grid_based_algorithms/lbgpu_cuda.cu @@ -38,7 +38,6 @@ #include #include -#include #include #include @@ -49,6 +48,7 @@ #include #include +#include #include #include #include @@ -2878,19 +2878,14 @@ void lb_integrate_GPU() { #endif } -void lb_gpu_get_boundary_forces(double *forces) { +void lb_gpu_get_boundary_forces(std::vector &forces) { #ifdef LB_BOUNDARIES_GPU - auto *temp = (float *)Utils::malloc(3 * LBBoundaries::lbboundaries.size() * - sizeof(float)); - cuda_safe_mem( - cudaMemcpy(temp, lb_boundary_force, - 3 * LBBoundaries::lbboundaries.size() * sizeof(float), - cudaMemcpyDeviceToHost)); - - for (int i = 0; i < 3 * LBBoundaries::lbboundaries.size(); i++) { - forces[i] = -(double)temp[i]; - } - free(temp); + std::vector temp(3 * LBBoundaries::lbboundaries.size()); + cuda_safe_mem(cudaMemcpy(temp.data(), lb_boundary_force, + temp.size() * sizeof(float), + cudaMemcpyDeviceToHost)); + std::transform(temp.begin(), temp.end(), forces.begin(), + [](float val) { return -static_cast(val); }); #endif } From 9ebeb43addf49ed49773ee1d5fb9b4a38bcc90b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 27 Jan 2021 15:35:23 +0100 Subject: [PATCH 11/16] core: Cleanup comments --- src/core/grid_based_algorithms/halo.cpp | 2 +- src/core/grid_based_algorithms/lb.cpp | 16 +++++++++------- src/core/grid_based_algorithms/lb.hpp | 10 ++++------ src/core/grid_based_algorithms/lbgpu_cuda.cu | 13 ++++++------- 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/core/grid_based_algorithms/halo.cpp b/src/core/grid_based_algorithms/halo.cpp index e8c24f42441..bce9665e48b 100644 --- a/src/core/grid_based_algorithms/halo.cpp +++ b/src/core/grid_based_algorithms/halo.cpp @@ -118,7 +118,7 @@ void halo_copy_vector(char *r_buffer, char *s_buffer, int count, } } -/** Copy lattice data with layout described by fieldtype. +/** Copy lattice data with layout described by @p type. * @param r_buffer data destination * @param s_buffer data source * @param count amount of data to copy diff --git a/src/core/grid_based_algorithms/lb.cpp b/src/core/grid_based_algorithms/lb.cpp index 88e1ae71943..338759ce9bd 100644 --- a/src/core/grid_based_algorithms/lb.cpp +++ b/src/core/grid_based_algorithms/lb.cpp @@ -175,11 +175,11 @@ using LB_FluidData = boost::multi_array; static LB_FluidData lbfluid_a; static LB_FluidData lbfluid_b; -/** Pointer to the velocity populations of the fluid. - * lbfluid contains pre-collision populations, lbfluid_post - * contains post-collision. +/** Span of the velocity populations of the fluid (pre-collision populations). */ LB_Fluid lbfluid; +/** Span of the velocity populations of the fluid (post-collision populations). + */ LB_Fluid lbfluid_post; std::vector lbfields; @@ -742,7 +742,6 @@ void lb_set_population_from_density_momentum_density_stress( } /**@}*/ -/** Calculation of hydrodynamic modes */ std::array lb_calc_modes(Lattice::index_t index, const LB_Fluid &lb_fluid) { return Utils::matrix_vector_product( @@ -1337,7 +1336,8 @@ void lb_bounce_back(LB_Fluid &lb_fluid, const LB_Parameters &lb_parameters, /** Calculate the local fluid momentum. * The calculation is implemented explicitly for the special case of D3Q19. - * @param[in] index Local lattice site + * @param[in] index Local lattice site + * @param[in] lb_fluid Populations of the fluid * @retval The local fluid momentum. */ Utils::Vector3d lb_calc_local_momentum_density(Lattice::index_t index, @@ -1356,9 +1356,11 @@ Utils::Vector3d lb_calc_local_momentum_density(Lattice::index_t index, lb_fluid[18][index]}}; } -// Statistics in MD units. /** Calculate momentum of the LB fluid. - * \param result Fluid momentum + * @param[out] result Fluid momentum in MD units + * @param[in] lb_parameters LB parameters + * @param[in] lb_fields Hydrodynamic fields of the fluid + * @param[in] lb_lattice The underlying lattice */ void lb_calc_fluid_momentum(double *result, const LB_Parameters &lb_parameters, const std::vector &lb_fields, diff --git a/src/core/grid_based_algorithms/lb.hpp b/src/core/grid_based_algorithms/lb.hpp index f1dcb52894b..5089ec99499 100644 --- a/src/core/grid_based_algorithms/lb.hpp +++ b/src/core/grid_based_algorithms/lb.hpp @@ -144,10 +144,7 @@ void lb_reinit_fluid(std::vector &lb_fields, const LB_Parameters &lb_parameters); void lb_reinit_parameters(LB_Parameters &lb_parameters); -/** Pointer to the velocity populations of the fluid. - * lbfluid contains pre-collision populations, lbfluid_post - * contains post-collision populations - */ + using LB_Fluid = std::array, 19>; extern LB_Fluid lbfluid; @@ -172,7 +169,7 @@ template auto get(const LB_Fluid_Ref &lb_fluid) { } // namespace Utils -/** Pointer to the hydrodynamic fields of the fluid */ +/** Hydrodynamic fields of the fluid */ extern std::vector lbfields; /************************************************************/ @@ -213,7 +210,8 @@ Utils::Vector6d lb_calc_pressure_tensor(std::array const &modes, /** Calculation of hydrodynamic modes. * - * @param index number of the node to calculate the modes for + * @param[in] index Number of the node to calculate the modes for + * @param[in] lb_fluid Populations of the fluid * @retval Array containing the modes. */ std::array lb_calc_modes(Lattice::index_t index, diff --git a/src/core/grid_based_algorithms/lbgpu_cuda.cu b/src/core/grid_based_algorithms/lbgpu_cuda.cu index 5d3818c4d8c..b139077aaa0 100644 --- a/src/core/grid_based_algorithms/lbgpu_cuda.cu +++ b/src/core/grid_based_algorithms/lbgpu_cuda.cu @@ -500,7 +500,7 @@ __device__ void relax_modes(Utils::Array &mode, unsigned int index, j[1] = Rho * u_tot[1]; j[2] = Rho * u_tot[2]; - /** equilibrium part of the stress modes (eq13 schiller) */ + /* equilibrium part of the stress modes (eq13 schiller) */ modes_from_pi_eq[0] = ((j[0] * j[0]) + (j[1] * j[1]) + (j[2] * j[2])) / Rho; modes_from_pi_eq[1] = ((j[0] * j[0]) - (j[1] * j[1])) / Rho; @@ -511,7 +511,7 @@ __device__ void relax_modes(Utils::Array &mode, unsigned int index, modes_from_pi_eq[4] = j[0] * j[2] / Rho; modes_from_pi_eq[5] = j[1] * j[2] / Rho; - /** relax the stress modes (eq14 schiller) */ + /* relax the stress modes (eq14 schiller) */ mode[4] = modes_from_pi_eq[0] + para->gamma_bulk * (mode[4] - modes_from_pi_eq[0]); @@ -1365,7 +1365,6 @@ __device__ __inline__ float three_point_polynomial_larger_than_half(float u) { /** * @brief Get velocity of at index. - * */ __device__ __inline__ float3 node_velocity(float rho_eq, LB_nodes_gpu n_a, int index) { @@ -1728,9 +1727,9 @@ __global__ void calc_n_from_rho_j_pi(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v, if (index < para->number_of_nodes) { Utils::Array mode; - /* default values for fields in lattice units */ gpu_check[0] = 1; + /* default values for fields in lattice units */ float Rho = para->rho; Utils::Array v{}; Utils::Array pi = {{Rho * D3Q19::c_sound_sq, 0.0f, @@ -2573,7 +2572,7 @@ void lb_reinit_extern_nodeforce_GPU(LB_parameters_gpu *lbpar_gpu) { /** Setup and call particle kernel from the host * @tparam no_of_neighbours The number of neighbours to consider for - * interpolation + * interpolation */ template void lb_calc_particle_lattice_ia_gpu(bool couple_virtual, double friction) { @@ -2972,8 +2971,8 @@ void lb_lbfluid_get_population(const Utils::Vector3i &xyz, /** * @brief Velocity interpolation functor - * @tparam no_of_neighbours The number of neighbours to consider for - * interpolation + * @tparam no_of_neighbours The number of neighbours to consider for + * interpolation */ template struct interpolation { LB_nodes_gpu current_nodes_gpu; From e4a4bc0d4d87aff88b05586d0896b66401e781c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Thu, 28 Jan 2021 19:44:33 +0100 Subject: [PATCH 12/16] core: Rename struct --- src/core/grid_based_algorithms/halo.cpp | 24 ++++++++++++------------ src/core/grid_based_algorithms/halo.hpp | 14 +++++++------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/core/grid_based_algorithms/halo.cpp b/src/core/grid_based_algorithms/halo.cpp index bce9665e48b..e39b3af248e 100644 --- a/src/core/grid_based_algorithms/halo.cpp +++ b/src/core/grid_based_algorithms/halo.cpp @@ -38,15 +38,15 @@ #include /** Primitive fieldtypes and their initializers */ -struct Fieldtype fieldtype_double = {0, {}, {}, sizeof(double), 0, - 0, 0, false, nullptr}; +FieldType fieldtype_double = {0, {}, {}, sizeof(double), 0, + 0, 0, false, nullptr}; void halo_create_field_vector(int vblocks, int vstride, int vskip, - Fieldtype *oldtype, Fieldtype **const newtype) { + FieldType *oldtype, FieldType **const newtype) { if (*newtype) { delete *newtype; } - *newtype = new Fieldtype{oldtype->count, + *newtype = new FieldType{oldtype->count, oldtype->disps, oldtype->lengths, oldtype->extent * ((vblocks - 1) * vskip + vstride), @@ -58,11 +58,11 @@ void halo_create_field_vector(int vblocks, int vstride, int vskip, } void halo_create_field_hvector(int vblocks, int vstride, int vskip, - Fieldtype *oldtype, Fieldtype **const newtype) { + FieldType *oldtype, FieldType **const newtype) { if (*newtype) { delete *newtype; } - *newtype = new Fieldtype{oldtype->count, + *newtype = new FieldType{oldtype->count, oldtype->disps, oldtype->lengths, oldtype->extent * vstride + (vblocks - 1) * vskip, @@ -78,7 +78,7 @@ void halo_create_field_hvector(int vblocks, int vstride, int vskip, * @param value integer value to write into the halo buffer * @param type halo field layout description */ -void halo_dtset(char *dest, int value, Fieldtype *type) { +void halo_dtset(char *dest, int value, FieldType *type) { auto const vblocks = type->vblocks; auto const vstride = type->vstride; auto const vskip = type->vskip; @@ -96,10 +96,10 @@ void halo_dtset(char *dest, int value, Fieldtype *type) { } } -void halo_dtcopy(char *r_buffer, char *s_buffer, int count, Fieldtype *type); +void halo_dtcopy(char *r_buffer, char *s_buffer, int count, FieldType *type); void halo_copy_vector(char *r_buffer, char *s_buffer, int count, - Fieldtype *type, bool vflag) { + FieldType *type, bool vflag) { auto const vblocks = type->vblocks; auto const vstride = type->vstride; @@ -124,7 +124,7 @@ void halo_copy_vector(char *r_buffer, char *s_buffer, int count, * @param count amount of data to copy * @param type field layout type */ -void halo_dtcopy(char *r_buffer, char *s_buffer, int count, Fieldtype *type) { +void halo_dtcopy(char *r_buffer, char *s_buffer, int count, FieldType *type) { if (type->subtype) { halo_copy_vector(r_buffer, s_buffer, count, type, type->vflag); @@ -146,7 +146,7 @@ void halo_dtcopy(char *r_buffer, char *s_buffer, int count, Fieldtype *type) { void prepare_halo_communication(HaloCommunicator *const hc, Lattice const *const lattice, - Fieldtype *fieldtype, MPI_Datatype datatype, + FieldType *fieldtype, MPI_Datatype datatype, const Utils::Vector3i &local_node_grid) { const auto grid = lattice->grid; @@ -240,7 +240,7 @@ void release_halo_communication(HaloCommunicator *const hc) { void halo_communication(HaloCommunicator const *const hc, char *const base) { - Fieldtype *fieldtype; + FieldType *fieldtype; MPI_Datatype datatype; MPI_Request request; MPI_Status status; diff --git a/src/core/grid_based_algorithms/halo.hpp b/src/core/grid_based_algorithms/halo.hpp index 93664cb1466..d1c755843be 100644 --- a/src/core/grid_based_algorithms/halo.hpp +++ b/src/core/grid_based_algorithms/halo.hpp @@ -58,7 +58,7 @@ * See \ref halo_create_field_vector and \ref * halo_dtcopy to understand how it works. */ -struct Fieldtype { +struct FieldType { int count; /**< number of subtypes in fieldtype */ std::vector disps; /**< displacements of the subtypes */ std::vector lengths; /**< lengths of the subtypes */ @@ -67,11 +67,11 @@ struct Fieldtype { int vstride; /**< size of strides in field vectors */ int vskip; /**< displacement between strides in field vectors */ bool vflag; - Fieldtype *subtype; + FieldType *subtype; }; /** Predefined fieldtypes */ -extern struct Fieldtype fieldtype_double; +extern FieldType fieldtype_double; /** Structure describing a Halo region */ typedef struct { @@ -84,7 +84,7 @@ typedef struct { unsigned long s_offset; /**< offset for send buffer */ unsigned long r_offset; /**< offset for receive buffer */ - Fieldtype *fieldtype; /**< type layout of the data being exchanged */ + FieldType *fieldtype; /**< type layout of the data being exchanged */ MPI_Datatype datatype; /**< MPI datatype of data being communicated */ } HaloInfo; @@ -108,9 +108,9 @@ class HaloCommunicator { * @param[out] newtype newly created fieldtype */ void halo_create_field_vector(int vblocks, int vstride, int vskip, - Fieldtype *oldtype, Fieldtype **newtype); + FieldType *oldtype, FieldType **newtype); void halo_create_field_hvector(int vblocks, int vstride, int vskip, - Fieldtype *oldtype, Fieldtype **newtype); + FieldType *oldtype, FieldType **newtype); /** Preparation of the halo parallelization scheme. Sets up the * necessary data structures for \ref halo_communication @@ -121,7 +121,7 @@ void halo_create_field_hvector(int vblocks, int vstride, int vskip, * @param local_node_grid Number of nodes in each spatial dimension */ void prepare_halo_communication(HaloCommunicator *hc, Lattice const *lattice, - Fieldtype *fieldtype, MPI_Datatype datatype, + FieldType *fieldtype, MPI_Datatype datatype, const Utils::Vector3i &local_node_grid); /** Frees data structures associated with a halo communicator From 73b74f41de2977064d5ba70c1fe05ed005a72d09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Thu, 28 Jan 2021 20:59:01 +0100 Subject: [PATCH 13/16] core: Use smart pointers --- src/core/grid_based_algorithms/halo.cpp | 59 +++++++------------------ src/core/grid_based_algorithms/halo.hpp | 40 ++++++++--------- src/core/grid_based_algorithms/lb.cpp | 10 ++--- 3 files changed, 39 insertions(+), 70 deletions(-) diff --git a/src/core/grid_based_algorithms/halo.cpp b/src/core/grid_based_algorithms/halo.cpp index e39b3af248e..cc96ffcf132 100644 --- a/src/core/grid_based_algorithms/halo.cpp +++ b/src/core/grid_based_algorithms/halo.cpp @@ -36,49 +36,18 @@ #include #include +#include -/** Primitive fieldtypes and their initializers */ -FieldType fieldtype_double = {0, {}, {}, sizeof(double), 0, - 0, 0, false, nullptr}; - -void halo_create_field_vector(int vblocks, int vstride, int vskip, - FieldType *oldtype, FieldType **const newtype) { - if (*newtype) { - delete *newtype; - } - *newtype = new FieldType{oldtype->count, - oldtype->disps, - oldtype->lengths, - oldtype->extent * ((vblocks - 1) * vskip + vstride), - vblocks, - vstride, - vskip, - true, - oldtype}; -} - -void halo_create_field_hvector(int vblocks, int vstride, int vskip, - FieldType *oldtype, FieldType **const newtype) { - if (*newtype) { - delete *newtype; - } - *newtype = new FieldType{oldtype->count, - oldtype->disps, - oldtype->lengths, - oldtype->extent * vstride + (vblocks - 1) * vskip, - vblocks, - vstride, - vskip, - false, - oldtype}; -} +/** Predefined fieldtype for double-precision LB */ +static std::shared_ptr fieldtype_double = + std::make_shared(static_cast(sizeof(double))); /** Set halo region to a given value * @param[out] dest pointer to the halo buffer * @param value integer value to write into the halo buffer * @param type halo field layout description */ -void halo_dtset(char *dest, int value, FieldType *type) { +void halo_dtset(char *dest, int value, std::shared_ptr type) { auto const vblocks = type->vblocks; auto const vstride = type->vstride; auto const vskip = type->vskip; @@ -96,10 +65,11 @@ void halo_dtset(char *dest, int value, FieldType *type) { } } -void halo_dtcopy(char *r_buffer, char *s_buffer, int count, FieldType *type); +void halo_dtcopy(char *r_buffer, char *s_buffer, int count, + std::shared_ptr type); void halo_copy_vector(char *r_buffer, char *s_buffer, int count, - FieldType *type, bool vflag) { + std::shared_ptr type, bool vflag) { auto const vblocks = type->vblocks; auto const vstride = type->vstride; @@ -124,7 +94,8 @@ void halo_copy_vector(char *r_buffer, char *s_buffer, int count, * @param count amount of data to copy * @param type field layout type */ -void halo_dtcopy(char *r_buffer, char *s_buffer, int count, FieldType *type) { +void halo_dtcopy(char *r_buffer, char *s_buffer, int count, + std::shared_ptr type) { if (type->subtype) { halo_copy_vector(r_buffer, s_buffer, count, type, type->vflag); @@ -146,7 +117,7 @@ void halo_dtcopy(char *r_buffer, char *s_buffer, int count, FieldType *type) { void prepare_halo_communication(HaloCommunicator *const hc, Lattice const *const lattice, - FieldType *fieldtype, MPI_Datatype datatype, + MPI_Datatype datatype, const Utils::Vector3i &local_node_grid) { const auto grid = lattice->grid; @@ -160,7 +131,7 @@ void prepare_halo_communication(HaloCommunicator *const hc, hc->num = num; hc->halo_info.resize(num); - auto const extent = static_cast(fieldtype->extent); + auto const extent = static_cast(fieldtype_double->extent); auto const node_neighbors = calc_node_neighbors(comm_cart); @@ -196,8 +167,8 @@ void prepare_halo_communication(HaloCommunicator *const hc, hinfo->source_node = node_neighbors[2 * dir + 1 - lr]; hinfo->dest_node = node_neighbors[2 * dir + lr]; - halo_create_field_vector(nblocks, stride, skip, fieldtype, - &hinfo->fieldtype); + hinfo->fieldtype = std::make_shared(nblocks, stride, skip, + true, fieldtype_double); MPI_Type_vector(nblocks, stride, skip, datatype, &hinfo->datatype); MPI_Type_commit(&hinfo->datatype); @@ -240,7 +211,7 @@ void release_halo_communication(HaloCommunicator *const hc) { void halo_communication(HaloCommunicator const *const hc, char *const base) { - FieldType *fieldtype; + std::shared_ptr fieldtype; MPI_Datatype datatype; MPI_Request request; MPI_Status status; diff --git a/src/core/grid_based_algorithms/halo.hpp b/src/core/grid_based_algorithms/halo.hpp index d1c755843be..699c5e9d426 100644 --- a/src/core/grid_based_algorithms/halo.hpp +++ b/src/core/grid_based_algorithms/halo.hpp @@ -34,6 +34,7 @@ #include +#include #include /** \name Types of halo communications */ @@ -55,10 +56,22 @@ /** Layout of the lattice data. * The description is similar to MPI datatypes but a bit more compact. - * See \ref halo_create_field_vector and \ref - * halo_dtcopy to understand how it works. */ struct FieldType { + FieldType(int new_extent) + : count(0), disps({}), lengths({}), extent(new_extent), vblocks(0), + vstride(0), vskip(0), vflag(false), subtype(nullptr) {} + FieldType(int new_vblocks, int new_vstride, int new_vskip, bool new_vflag, + std::shared_ptr oldtype) + : count(oldtype->count), disps(oldtype->disps), lengths(oldtype->lengths), + extent(0), vblocks(new_vblocks), vstride(new_vstride), vskip(new_vskip), + vflag(new_vflag), subtype(oldtype) { + if (vflag) { + extent = oldtype->extent * ((vblocks - 1) * vskip + vstride); + } else { + extent = oldtype->extent * vstride + (vblocks - 1) * vskip; + } + } int count; /**< number of subtypes in fieldtype */ std::vector disps; /**< displacements of the subtypes */ std::vector lengths; /**< lengths of the subtypes */ @@ -67,12 +80,9 @@ struct FieldType { int vstride; /**< size of strides in field vectors */ int vskip; /**< displacement between strides in field vectors */ bool vflag; - FieldType *subtype; + std::shared_ptr subtype; }; -/** Predefined fieldtypes */ -extern FieldType fieldtype_double; - /** Structure describing a Halo region */ typedef struct { @@ -84,7 +94,8 @@ typedef struct { unsigned long s_offset; /**< offset for send buffer */ unsigned long r_offset; /**< offset for receive buffer */ - FieldType *fieldtype; /**< type layout of the data being exchanged */ + std::shared_ptr + fieldtype; /**< type layout of the data being exchanged */ MPI_Datatype datatype; /**< MPI datatype of data being communicated */ } HaloInfo; @@ -100,28 +111,15 @@ class HaloCommunicator { std::vector halo_info; /**< set of halo communications */ }; -/** Creates a field vector layout - * @param vblocks number of vector blocks - * @param vstride size of strides in field vector - * @param vskip displacements of strides in field vector - * @param oldtype fieldtype the vector is composed of - * @param[out] newtype newly created fieldtype - */ -void halo_create_field_vector(int vblocks, int vstride, int vskip, - FieldType *oldtype, FieldType **newtype); -void halo_create_field_hvector(int vblocks, int vstride, int vskip, - FieldType *oldtype, FieldType **newtype); - /** Preparation of the halo parallelization scheme. Sets up the * necessary data structures for \ref halo_communication * @param[in,out] hc halo communicator being created * @param[in] lattice lattice the communication is created for - * @param fieldtype field layout of the lattice data * @param datatype MPI datatype for the lattice data * @param local_node_grid Number of nodes in each spatial dimension */ void prepare_halo_communication(HaloCommunicator *hc, Lattice const *lattice, - FieldType *fieldtype, MPI_Datatype datatype, + MPI_Datatype datatype, const Utils::Vector3i &local_node_grid); /** Frees data structures associated with a halo communicator diff --git a/src/core/grid_based_algorithms/lb.cpp b/src/core/grid_based_algorithms/lb.cpp index 338759ce9bd..25add1be1d8 100644 --- a/src/core/grid_based_algorithms/lb.cpp +++ b/src/core/grid_based_algorithms/lb.cpp @@ -63,6 +63,7 @@ #include #include #include +#include #include using Utils::get_linear_index; @@ -652,8 +653,7 @@ void lb_prepare_communication(HaloCommunicator &halo_comm, * datatypes */ /* prepare the communication for a single velocity */ - prepare_halo_communication(&comm, &lb_lattice, &fieldtype_double, MPI_DOUBLE, - node_grid); + prepare_halo_communication(&comm, &lb_lattice, MPI_DOUBLE, node_grid); halo_comm.num = comm.num; halo_comm.halo_info.resize(comm.num); @@ -681,10 +681,10 @@ void lb_prepare_communication(HaloCommunicator &halo_comm, comm.halo_info[i].datatype, &hinfo->datatype); MPI_Type_commit(&hinfo->datatype); - halo_create_field_hvector( + hinfo->fieldtype = std::make_shared( D3Q19::n_vel, 1, - static_cast(lb_lattice.halo_grid_volume * sizeof(double)), - comm.halo_info[i].fieldtype, &hinfo->fieldtype); + static_cast(lb_lattice.halo_grid_volume * sizeof(double)), false, + comm.halo_info[i].fieldtype); } release_halo_communication(&comm); From ab18bedfb9d5a434122af5a01c38270895702e55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Thu, 28 Jan 2021 21:18:03 +0100 Subject: [PATCH 14/16] core: Change global variable type to bool --- src/core/grid_based_algorithms/lbgpu_cuda.cu | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/core/grid_based_algorithms/lbgpu_cuda.cu b/src/core/grid_based_algorithms/lbgpu_cuda.cu index b139077aaa0..d472d2de7ac 100644 --- a/src/core/grid_based_algorithms/lbgpu_cuda.cu +++ b/src/core/grid_based_algorithms/lbgpu_cuda.cu @@ -58,13 +58,13 @@ extern int this_node; -/** device_rho_v: struct for hydrodynamic fields: this is for internal use +/** struct for hydrodynamic fields: this is for internal use * (i.e. stores values in LB units) and should not used for * printing values */ static LB_rho_v_gpu *device_rho_v = nullptr; -/** print_rho_v_pi: struct for hydrodynamic fields: this is the interface +/** struct for hydrodynamic fields: this is the interface * and stores values in MD units. It should not be used * as an input for any LB calculations. TODO: in the future, * one might want to have several structures for printing @@ -98,7 +98,7 @@ static float *lb_boundary_force = nullptr; #endif /** @brief Whether LB GPU was initialized */ -static int *device_gpu_lb_initialized = nullptr; +static bool *device_gpu_lb_initialized = nullptr; /** @brief Direction of data transfer between @ref nodes_a and @ref nodes_b * during integration in @ref lb_integrate_GPU @@ -1719,7 +1719,7 @@ calc_node_force(Utils::Array const &delta, */ __global__ void calc_n_from_rho_j_pi(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v, LB_node_force_density_gpu node_f, - int *gpu_check) { + bool *gpu_check) { /* TODO: this can handle only a uniform density, something similar, but local, has to be called every time the fields are set by the user ! */ unsigned int index = blockIdx.y * gridDim.x * blockDim.x + @@ -1727,7 +1727,7 @@ __global__ void calc_n_from_rho_j_pi(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v, if (index < para->number_of_nodes) { Utils::Array mode; - gpu_check[0] = 1; + gpu_check[0] = true; /* default values for fields in lattice units */ float Rho = para->rho; @@ -2424,7 +2424,7 @@ void lb_init_GPU(LB_parameters_gpu *lbpar_gpu) { /*write parameters in const memory*/ cuda_safe_mem(cudaMemcpyToSymbol(para, lbpar_gpu, sizeof(LB_parameters_gpu))); - free_realloc_and_clear(device_gpu_lb_initialized, sizeof(int)); + free_realloc_and_clear(device_gpu_lb_initialized, sizeof(bool)); /* values for the kernel call */ int threads_per_block = 64; @@ -2444,9 +2444,9 @@ void lb_init_GPU(LB_parameters_gpu *lbpar_gpu) { intflag = true; current_nodes = &nodes_a; - int host_gpu_lb_initialized = 0; + bool host_gpu_lb_initialized = false; cuda_safe_mem(cudaMemcpy(&host_gpu_lb_initialized, device_gpu_lb_initialized, - sizeof(int), cudaMemcpyDeviceToHost)); + sizeof(bool), cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); if (!host_gpu_lb_initialized) { From d036f620b3f25ed56e948b287fec91bcd289c0e1 Mon Sep 17 00:00:00 2001 From: Kai Szuttor Date: Fri, 29 Jan 2021 10:59:57 +0100 Subject: [PATCH 15/16] lb: removed some C style code. --- src/core/grid_based_algorithms/halo.cpp | 87 +++++++------- src/core/grid_based_algorithms/halo.hpp | 6 +- src/core/grid_based_algorithms/lb.cpp | 106 +++++++----------- .../grid_based_algorithms/lb_interface.cpp | 2 +- 4 files changed, 89 insertions(+), 112 deletions(-) diff --git a/src/core/grid_based_algorithms/halo.cpp b/src/core/grid_based_algorithms/halo.cpp index cc96ffcf132..b90be347376 100644 --- a/src/core/grid_based_algorithms/halo.cpp +++ b/src/core/grid_based_algorithms/halo.cpp @@ -115,21 +115,20 @@ void halo_dtcopy(char *r_buffer, char *s_buffer, int count, } } -void prepare_halo_communication(HaloCommunicator *const hc, - Lattice const *const lattice, +void prepare_halo_communication(HaloCommunicator &hc, const Lattice &lattice, MPI_Datatype datatype, const Utils::Vector3i &local_node_grid) { - const auto grid = lattice->grid; - const auto period = lattice->halo_grid; + const auto &grid = lattice.grid; + const auto &period = lattice.halo_grid; - for (int n = 0; n < hc->num; n++) { - MPI_Type_free(&(hc->halo_info[n].datatype)); + for (int n = 0; n < hc.num; n++) { + MPI_Type_free(&(hc.halo_info[n].datatype)); } int const num = 2 * 3; /* two communications in each space direction */ - hc->num = num; - hc->halo_info.resize(num); + hc.num = num; + hc.halo_info.resize(num); auto const extent = static_cast(fieldtype_double->extent); @@ -139,7 +138,7 @@ void prepare_halo_communication(HaloCommunicator *const hc, for (int dir = 0; dir < 3; dir++) { for (int lr = 0; lr < 2; lr++) { - HaloInfo *hinfo = &(hc->halo_info[cnt]); + HaloInfo &hinfo = hc.halo_info[cnt]; int nblocks = 1; for (int k = dir + 1; k < 3; k++) { @@ -156,46 +155,46 @@ void prepare_halo_communication(HaloCommunicator *const hc, if (lr == 0) { /* send to left, recv from right */ - hinfo->s_offset = extent * static_cast(stride * 1); - hinfo->r_offset = extent * static_cast(stride * (grid[dir] + 1)); + hinfo.s_offset = extent * static_cast(stride * 1); + hinfo.r_offset = extent * static_cast(stride * (grid[dir] + 1)); } else { /* send to right, recv from left */ - hinfo->s_offset = extent * static_cast(stride * grid[dir]); - hinfo->r_offset = extent * static_cast(stride * 0); + hinfo.s_offset = extent * static_cast(stride * grid[dir]); + hinfo.r_offset = extent * static_cast(stride * 0); } - hinfo->source_node = node_neighbors[2 * dir + 1 - lr]; - hinfo->dest_node = node_neighbors[2 * dir + lr]; + hinfo.source_node = node_neighbors[2 * dir + 1 - lr]; + hinfo.dest_node = node_neighbors[2 * dir + lr]; - hinfo->fieldtype = std::make_shared(nblocks, stride, skip, - true, fieldtype_double); + hinfo.fieldtype = std::make_shared(nblocks, stride, skip, true, + fieldtype_double); - MPI_Type_vector(nblocks, stride, skip, datatype, &hinfo->datatype); - MPI_Type_commit(&hinfo->datatype); + MPI_Type_vector(nblocks, stride, skip, datatype, &hinfo.datatype); + MPI_Type_commit(&hinfo.datatype); if (!box_geo.periodic(dir) && (local_geo.boundary()[2 * dir + lr] != 0 || local_geo.boundary()[2 * dir + 1 - lr] != 0)) { if (local_node_grid[dir] == 1) { - hinfo->type = HALO_OPEN; + hinfo.type = HALO_OPEN; } else if (lr == 0) { if (local_geo.boundary()[2 * dir + lr] == 1) { - hinfo->type = HALO_RECV; + hinfo.type = HALO_RECV; } else { - hinfo->type = HALO_SEND; + hinfo.type = HALO_SEND; } } else { if (local_geo.boundary()[2 * dir + lr] == -1) { - hinfo->type = HALO_RECV; + hinfo.type = HALO_RECV; } else { - hinfo->type = HALO_SEND; + hinfo.type = HALO_SEND; } } } else { if (local_node_grid[dir] == 1) { - hc->halo_info[cnt].type = HALO_LOCL; + hc.halo_info[cnt].type = HALO_LOCL; } else { - hc->halo_info[cnt].type = HALO_SENDRECV; + hc.halo_info[cnt].type = HALO_SENDRECV; } } cnt++; @@ -203,44 +202,44 @@ void prepare_halo_communication(HaloCommunicator *const hc, } } -void release_halo_communication(HaloCommunicator *const hc) { - for (int n = 0; n < hc->num; n++) { - MPI_Type_free(&(hc->halo_info[n].datatype)); +void release_halo_communication(HaloCommunicator &hc) { + for (int n = 0; n < hc.num; n++) { + MPI_Type_free(&(hc.halo_info[n].datatype)); } } -void halo_communication(HaloCommunicator const *const hc, char *const base) { +void halo_communication(const HaloCommunicator &hc, char *const base) { std::shared_ptr fieldtype; MPI_Datatype datatype; MPI_Request request; MPI_Status status; - for (int n = 0; n < hc->num; n++) { + for (int n = 0; n < hc.num; n++) { int s_node, r_node; - int comm_type = hc->halo_info[n].type; - char *s_buffer = (char *)base + hc->halo_info[n].s_offset; - char *r_buffer = (char *)base + hc->halo_info[n].r_offset; + int comm_type = hc.halo_info[n].type; + char *s_buffer = (char *)base + hc.halo_info[n].s_offset; + char *r_buffer = (char *)base + hc.halo_info[n].r_offset; switch (comm_type) { case HALO_LOCL: - fieldtype = hc->halo_info[n].fieldtype; + fieldtype = hc.halo_info[n].fieldtype; halo_dtcopy(r_buffer, s_buffer, 1, fieldtype); break; case HALO_SENDRECV: - datatype = hc->halo_info[n].datatype; - s_node = hc->halo_info[n].source_node; - r_node = hc->halo_info[n].dest_node; + datatype = hc.halo_info[n].datatype; + s_node = hc.halo_info[n].source_node; + r_node = hc.halo_info[n].dest_node; MPI_Sendrecv(s_buffer, 1, datatype, r_node, REQ_HALO_SPREAD, r_buffer, 1, datatype, s_node, REQ_HALO_SPREAD, comm_cart, &status); break; case HALO_SEND: - datatype = hc->halo_info[n].datatype; - fieldtype = hc->halo_info[n].fieldtype; - r_node = hc->halo_info[n].dest_node; + datatype = hc.halo_info[n].datatype; + fieldtype = hc.halo_info[n].fieldtype; + r_node = hc.halo_info[n].dest_node; MPI_Isend(s_buffer, 1, datatype, r_node, REQ_HALO_SPREAD, comm_cart, &request); halo_dtset(r_buffer, 0, fieldtype); @@ -248,15 +247,15 @@ void halo_communication(HaloCommunicator const *const hc, char *const base) { break; case HALO_RECV: - datatype = hc->halo_info[n].datatype; - s_node = hc->halo_info[n].source_node; + datatype = hc.halo_info[n].datatype; + s_node = hc.halo_info[n].source_node; MPI_Irecv(r_buffer, 1, datatype, s_node, REQ_HALO_SPREAD, comm_cart, &request); MPI_Wait(&request, &status); break; case HALO_OPEN: - fieldtype = hc->halo_info[n].fieldtype; + fieldtype = hc.halo_info[n].fieldtype; /** \todo this does not work for the n_i - \ */ halo_dtset(r_buffer, 0, fieldtype); break; diff --git a/src/core/grid_based_algorithms/halo.hpp b/src/core/grid_based_algorithms/halo.hpp index 699c5e9d426..8f61b987bd1 100644 --- a/src/core/grid_based_algorithms/halo.hpp +++ b/src/core/grid_based_algorithms/halo.hpp @@ -118,20 +118,20 @@ class HaloCommunicator { * @param datatype MPI datatype for the lattice data * @param local_node_grid Number of nodes in each spatial dimension */ -void prepare_halo_communication(HaloCommunicator *hc, Lattice const *lattice, +void prepare_halo_communication(HaloCommunicator &hc, const Lattice &lattice, MPI_Datatype datatype, const Utils::Vector3i &local_node_grid); /** Frees data structures associated with a halo communicator * @param[in,out] hc halo communicator to be released */ -void release_halo_communication(HaloCommunicator *hc); +void release_halo_communication(HaloCommunicator &hc); /** Perform communication according to the parallelization scheme * described by the halo communicator * @param[in] hc halo communicator describing the parallelization scheme * @param[in] base base plane of local node */ -void halo_communication(HaloCommunicator const *hc, char *base); +void halo_communication(const HaloCommunicator &hc, char *constbase); #endif /* HALO_H */ diff --git a/src/core/grid_based_algorithms/lb.cpp b/src/core/grid_based_algorithms/lb.cpp index 25add1be1d8..6f6edf0bdb5 100644 --- a/src/core/grid_based_algorithms/lb.cpp +++ b/src/core/grid_based_algorithms/lb.cpp @@ -653,20 +653,20 @@ void lb_prepare_communication(HaloCommunicator &halo_comm, * datatypes */ /* prepare the communication for a single velocity */ - prepare_halo_communication(&comm, &lb_lattice, MPI_DOUBLE, node_grid); + prepare_halo_communication(comm, lb_lattice, MPI_DOUBLE, node_grid); halo_comm.num = comm.num; halo_comm.halo_info.resize(comm.num); /* replicate the halo structure */ for (int i = 0; i < comm.num; i++) { - HaloInfo *hinfo = &(halo_comm.halo_info[i]); + HaloInfo &hinfo = halo_comm.halo_info[i]; - hinfo->source_node = comm.halo_info[i].source_node; - hinfo->dest_node = comm.halo_info[i].dest_node; - hinfo->s_offset = comm.halo_info[i].s_offset; - hinfo->r_offset = comm.halo_info[i].r_offset; - hinfo->type = comm.halo_info[i].type; + hinfo.source_node = comm.halo_info[i].source_node; + hinfo.dest_node = comm.halo_info[i].dest_node; + hinfo.s_offset = comm.halo_info[i].s_offset; + hinfo.r_offset = comm.halo_info[i].r_offset; + hinfo.type = comm.halo_info[i].type; /* generate the vector datatype for the structure of lattices we * have to use hvector here because the extent of the subtypes @@ -678,16 +678,16 @@ void lb_prepare_communication(HaloCommunicator &halo_comm, MPI_Type_get_extent(MPI_DOUBLE, &lower, &extent); MPI_Type_create_hvector(D3Q19::n_vel, 1, lb_lattice.halo_grid_volume * extent, - comm.halo_info[i].datatype, &hinfo->datatype); - MPI_Type_commit(&hinfo->datatype); + comm.halo_info[i].datatype, &hinfo.datatype); + MPI_Type_commit(&hinfo.datatype); - hinfo->fieldtype = std::make_shared( + hinfo.fieldtype = std::make_shared( D3Q19::n_vel, 1, static_cast(lb_lattice.halo_grid_volume * sizeof(double)), false, comm.halo_info[i].fieldtype); } - release_halo_communication(&comm); + release_halo_communication(comm); } /***********************************************************************/ @@ -969,7 +969,7 @@ void lb_collide_stream() { /* swap the pointers for old and new population fields */ std::swap(lbfluid, lbfluid_post); - halo_communication(&update_halo_comm, + halo_communication(update_halo_comm, reinterpret_cast(lbfluid[0].data())); #ifdef ADDITIONAL_CHECKS @@ -998,16 +998,13 @@ void lattice_boltzmann_update() { /** \name Coupling part */ /***********************************************************************/ /**@{*/ - -static int compare_buffers(double *buf1, double *buf2, int size) { - int ret; - if (memcmp(buf1, buf2, size) != 0) { +template int compare_buffers(T const &buff_a, T const &buff_b) { + if (not(buff_a == buff_b)) { runtimeErrorMsg() << "Halo buffers are not identical"; - ret = 1; + return 1; } else { - ret = 0; + return 0; } - return ret; } #ifdef ADDITIONAL_CHECKS @@ -1019,9 +1016,8 @@ void lb_check_halo_regions(const LB_Fluid &lb_fluid, Lattice::index_t index; int i, x, y, z, s_node, r_node; constexpr auto count = static_cast(D3Q19::n_vel); - double s_buffer[count]; - double r_buffer[count]; - MPI_Status status[2]; + std::array s_buffer; + std::array r_buffer; auto const node_neighbors = calc_node_neighbors(comm_cart); @@ -1035,22 +1031,19 @@ void lb_check_halo_regions(const LB_Fluid &lb_fluid, s_node = node_neighbors[1]; r_node = node_neighbors[0]; if (n_nodes > 1) { - MPI_Sendrecv(s_buffer, count, MPI_DOUBLE, r_node, REQ_HALO_CHECK, - r_buffer, count, MPI_DOUBLE, s_node, REQ_HALO_CHECK, - comm_cart, status); + comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, + REQ_HALO_CHECK, r_buffer); index = get_linear_index(lb_lattice.grid[0], y, z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double))); + compare_buffers(s_buffer, r_buffer); } else { index = get_linear_index(lb_lattice.grid[0], y, z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double)))) { + if (compare_buffers(s_buffer, r_buffer)) { std::cerr << "buffers differ in dir=" << 0 << " at index=" << index << " y=" << y << " z=" << z << "\n"; } @@ -1064,20 +1057,17 @@ void lb_check_halo_regions(const LB_Fluid &lb_fluid, s_node = node_neighbors[0]; r_node = node_neighbors[1]; if (n_nodes > 1) { - MPI_Sendrecv(s_buffer, count, MPI_DOUBLE, r_node, REQ_HALO_CHECK, - r_buffer, count, MPI_DOUBLE, s_node, REQ_HALO_CHECK, - comm_cart, status); + comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, + REQ_HALO_CHECK, r_buffer); index = get_linear_index(1, y, z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double))); + compare_buffers(s_buffer, r_buffer); } else { index = get_linear_index(1, y, z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double)))) { + if (compare_buffers(s_buffer, r_buffer)) { std::cerr << "buffers differ in dir=0 at index=" << index << " y=" << y << " z=" << z << "\n"; } @@ -1096,22 +1086,19 @@ void lb_check_halo_regions(const LB_Fluid &lb_fluid, s_node = node_neighbors[3]; r_node = node_neighbors[2]; if (n_nodes > 1) { - MPI_Sendrecv(s_buffer, count, MPI_DOUBLE, r_node, REQ_HALO_CHECK, - r_buffer, count, MPI_DOUBLE, s_node, REQ_HALO_CHECK, - comm_cart, status); + comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, + REQ_HALO_CHECK, r_buffer); index = get_linear_index(x, lb_lattice.grid[1], z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double))); + compare_buffers(s_buffer, r_buffer); } else { index = get_linear_index(x, lb_lattice.grid[1], z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double)))) { + if (compare_buffers(s_buffer, r_buffer)) { std::cerr << "buffers differ in dir=1 at index=" << index << " x=" << x << " z=" << z << "\n"; } @@ -1126,20 +1113,17 @@ void lb_check_halo_regions(const LB_Fluid &lb_fluid, s_node = node_neighbors[2]; r_node = node_neighbors[3]; if (n_nodes > 1) { - MPI_Sendrecv(s_buffer, count, MPI_DOUBLE, r_node, REQ_HALO_CHECK, - r_buffer, count, MPI_DOUBLE, s_node, REQ_HALO_CHECK, - comm_cart, status); + comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, + REQ_HALO_CHECK, r_buffer); index = get_linear_index(x, 1, z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double))); + compare_buffers(s_buffer, r_buffer); } else { index = get_linear_index(x, 1, z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double)))) { + if (compare_buffers(s_buffer, r_buffer)) { std::cerr << "buffers differ in dir=1 at index=" << index << " x=" << x << " z=" << z << "\n"; } @@ -1158,22 +1142,19 @@ void lb_check_halo_regions(const LB_Fluid &lb_fluid, s_node = node_neighbors[5]; r_node = node_neighbors[4]; if (n_nodes > 1) { - MPI_Sendrecv(s_buffer, count, MPI_DOUBLE, r_node, REQ_HALO_CHECK, - r_buffer, count, MPI_DOUBLE, s_node, REQ_HALO_CHECK, - comm_cart, status); + comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, + REQ_HALO_CHECK, r_buffer); index = get_linear_index(x, y, lb_lattice.grid[2], lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double))); + compare_buffers(s_buffer, r_buffer); } else { index = get_linear_index(x, y, lb_lattice.grid[2], lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double)))) { + if (compare_buffers(s_buffer, r_buffer)) { std::cerr << "buffers differ in dir=2 at index=" << index << " x=" << x << " y=" << y << " z=" << lb_lattice.grid[2] << "\n"; @@ -1191,20 +1172,17 @@ void lb_check_halo_regions(const LB_Fluid &lb_fluid, s_node = node_neighbors[4]; r_node = node_neighbors[5]; if (n_nodes > 1) { - MPI_Sendrecv(s_buffer, count, MPI_DOUBLE, r_node, REQ_HALO_CHECK, - r_buffer, count, MPI_DOUBLE, s_node, REQ_HALO_CHECK, - comm_cart, status); + comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, + REQ_HALO_CHECK, r_buffer); index = get_linear_index(x, y, 1, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double))); + compare_buffers(s_buffer, r_buffer); } else { index = get_linear_index(x, y, 1, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double)))) { + if (compare_buffers(s_buffer, r_buffer)) { std::cerr << "buffers differ in dir=2 at index=" << index << " x=" << x << " y=" << y << "\n"; } diff --git a/src/core/grid_based_algorithms/lb_interface.cpp b/src/core/grid_based_algorithms/lb_interface.cpp index a48da0bda33..8e56c66eaeb 100644 --- a/src/core/grid_based_algorithms/lb_interface.cpp +++ b/src/core/grid_based_algorithms/lb_interface.cpp @@ -125,7 +125,7 @@ void lb_lbfluid_sanity_checks() { void lb_lbfluid_on_integration_start() { lb_lbfluid_sanity_checks(); if (lattice_switch == ActiveLB::CPU) { - halo_communication(&update_halo_comm, + halo_communication(update_halo_comm, reinterpret_cast(lbfluid[0].data())); } } From 62d97d104629f2e2405bc9050e72475d39fcad4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Fri, 29 Jan 2021 13:14:48 +0100 Subject: [PATCH 16/16] core: Fix regression and clang-tidy warnings --- src/core/grid_based_algorithms/halo.hpp | 2 +- src/core/grid_based_algorithms/lb.cpp | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/core/grid_based_algorithms/halo.hpp b/src/core/grid_based_algorithms/halo.hpp index 8f61b987bd1..4e0b8d39a96 100644 --- a/src/core/grid_based_algorithms/halo.hpp +++ b/src/core/grid_based_algorithms/halo.hpp @@ -132,6 +132,6 @@ void release_halo_communication(HaloCommunicator &hc); * @param[in] hc halo communicator describing the parallelization scheme * @param[in] base base plane of local node */ -void halo_communication(const HaloCommunicator &hc, char *constbase); +void halo_communication(const HaloCommunicator &hc, char *base); #endif /* HALO_H */ diff --git a/src/core/grid_based_algorithms/lb.cpp b/src/core/grid_based_algorithms/lb.cpp index 6f6edf0bdb5..309c3447410 100644 --- a/src/core/grid_based_algorithms/lb.cpp +++ b/src/core/grid_based_algorithms/lb.cpp @@ -999,12 +999,11 @@ void lattice_boltzmann_update() { /***********************************************************************/ /**@{*/ template int compare_buffers(T const &buff_a, T const &buff_b) { - if (not(buff_a == buff_b)) { + if (buff_a != buff_b) { runtimeErrorMsg() << "Halo buffers are not identical"; - return 1; - } else { - return 0; + return ES_ERROR; } + return ES_OK; } #ifdef ADDITIONAL_CHECKS @@ -1014,10 +1013,10 @@ template int compare_buffers(T const &buff_a, T const &buff_b) { void lb_check_halo_regions(const LB_Fluid &lb_fluid, const Lattice &lb_lattice) { Lattice::index_t index; - int i, x, y, z, s_node, r_node; - constexpr auto count = static_cast(D3Q19::n_vel); - std::array s_buffer; - std::array r_buffer; + std::size_t i; + int x, y, z, s_node, r_node; + std::array s_buffer; + std::array r_buffer; auto const node_neighbors = calc_node_neighbors(comm_cart);