From 938a703503aa2309b424deded9671ceaca72fb3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 3 Jul 2024 23:22:07 +0200 Subject: [PATCH 1/2] Extract historic FFT algorithm Encapsulate the FFT algorithm and move its sources to a new folder. Reconcile electrostatics and magnetostatics FFT operations. Hide FFTW3 with type erasure. Alternative FFT backends can now be used. --- src/core/CMakeLists.txt | 4 + src/core/electrostatics/elc.cpp | 2 +- src/core/electrostatics/p3m.cpp | 148 ++++----- src/core/electrostatics/p3m.hpp | 44 +-- src/core/fft/CMakeLists.txt | 24 ++ src/core/{p3m => fft}/fft.cpp | 311 +++++++++--------- src/core/{p3m => fft}/fft.hpp | 194 +++++------ src/core/fft/vector.hpp | 68 ++++ src/core/magnetostatics/dp3m.cpp | 249 ++++++-------- src/core/magnetostatics/dp3m.hpp | 50 +-- src/core/p3m/CMakeLists.txt | 14 +- src/core/p3m/FFTBackendLegacy.cpp | 112 +++++++ src/core/p3m/FFTBackendLegacy.hpp | 78 +++++ src/core/p3m/common.cpp | 6 +- src/core/p3m/common.hpp | 45 +-- src/core/p3m/data_struct.hpp | 82 ++++- src/core/p3m/for_each_3d.hpp | 1 + src/core/p3m/influence_function.hpp | 37 ++- src/core/p3m/interpolation.hpp | 6 +- src/core/p3m/send_mesh.cpp | 37 ++- src/core/p3m/send_mesh.hpp | 38 +-- .../EspressoSystemStandAlone_test.cpp | 2 + src/core/unit_tests/fft_test.cpp | 19 +- .../electrostatics/CoulombP3M.hpp | 2 + .../electrostatics/CoulombP3MGPU.hpp | 2 + .../magnetostatics/DipolarP3M.hpp | 3 + 26 files changed, 937 insertions(+), 641 deletions(-) create mode 100644 src/core/fft/CMakeLists.txt rename src/core/{p3m => fft}/fft.cpp (75%) rename src/core/{p3m => fft}/fft.hpp (54%) create mode 100644 src/core/fft/vector.hpp create mode 100644 src/core/p3m/FFTBackendLegacy.cpp create mode 100644 src/core/p3m/FFTBackendLegacy.hpp diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 6153849cc7a..0e7b82024d1 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -95,6 +95,10 @@ if(ESPRESSO_BUILD_WITH_WALBERLA) $<$:espresso::walberla_cuda>) endif() +if(ESPRESSO_BUILD_WITH_FFTW) + add_subdirectory(fft) +endif() + add_subdirectory(accumulators) add_subdirectory(analysis) add_subdirectory(bond_breakage) diff --git a/src/core/electrostatics/elc.cpp b/src/core/electrostatics/elc.cpp index d7832e5f9ee..6c2c3f05313 100644 --- a/src/core/electrostatics/elc.cpp +++ b/src/core/electrostatics/elc.cpp @@ -1128,7 +1128,7 @@ void charge_assign(elc_data const &elc, CoulombP3M &solver, } /* prepare local FFT mesh */ for (int i = 0; i < solver.p3m.local_mesh.size; i++) - solver.p3m.rs_mesh[i] = 0.; + solver.p3m.mesh.rs_scalar[i] = 0.; for (auto zipped : p_q_pos_range) { auto const p_q = boost::get<0>(zipped); diff --git a/src/core/electrostatics/p3m.cpp b/src/core/electrostatics/p3m.cpp index b16cb96f141..5e604a4fd51 100644 --- a/src/core/electrostatics/p3m.cpp +++ b/src/core/electrostatics/p3m.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2022 The ESPResSo project + * Copyright (C) 2010-2024 The ESPResSo project * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 * Max-Planck-Institute for Polymer Research, Theory Group * @@ -33,9 +33,9 @@ #include "electrostatics/p3m_gpu.hpp" #include "electrostatics/p3m_gpu_error.hpp" +#include "fft/fft.hpp" #include "p3m/TuningAlgorithm.hpp" #include "p3m/TuningLogger.hpp" -#include "p3m/fft.hpp" #include "p3m/for_each_3d.hpp" #include "p3m/influence_function.hpp" #include "p3m/math.hpp" @@ -106,30 +106,27 @@ void CoulombP3M::count_charged_particles() { /** Calculate the optimal influence function of @cite hockney88a. * (optimised for force calculations) * - * Each node calculates only the values for its domain in k-space - * (see fft.plan[3].mesh and fft.plan[3].start). + * Each node calculates only the values for its domain in k-space. * * See also: @cite hockney88a eq. 8-22 (p. 275). Note the somewhat * different convention for the prefactors, which is described in * @cite deserno98a @cite deserno98b. */ void CoulombP3M::calc_influence_function_force() { - auto const start = Utils::Vector3i{p3m.fft.plan[3].start}; - auto const size = Utils::Vector3i{p3m.fft.plan[3].new_mesh}; - - p3m.g_force = grid_influence_function<1>(p3m.params, start, start + size, - get_system().box_geo->length_inv()); + auto const [KX, KY, KZ] = p3m.fft->get_permutations(); + p3m.g_force = + grid_influence_function<1>(p3m.params, p3m.mesh.start, p3m.mesh.stop, KX, + KY, KZ, get_system().box_geo->length_inv()); } /** Calculate the influence function optimized for the energy and the * self energy correction. */ void CoulombP3M::calc_influence_function_energy() { - auto const start = Utils::Vector3i{p3m.fft.plan[3].start}; - auto const size = Utils::Vector3i{p3m.fft.plan[3].new_mesh}; - - p3m.g_energy = grid_influence_function<0>(p3m.params, start, start + size, - get_system().box_geo->length_inv()); + auto const [KX, KY, KZ] = p3m.fft->get_permutations(); + p3m.g_energy = + grid_influence_function<0>(p3m.params, p3m.mesh.start, p3m.mesh.stop, KX, + KY, KZ, get_system().box_geo->length_inv()); } /** Aliasing sum used by @ref p3m_k_space_error. */ @@ -138,8 +135,8 @@ static auto p3m_tune_aliasing_sums(Utils::Vector3i const &shift, Utils::Vector3d const &mesh_i, int cao, double alpha_L_i) { - auto constexpr m_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN); - auto constexpr m_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1); + auto constexpr mesh_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN); + auto constexpr mesh_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1); auto const factor1 = Utils::sqr(std::numbers::pi * alpha_L_i); auto alias1 = 0.; auto alias2 = 0.; @@ -148,7 +145,7 @@ static auto p3m_tune_aliasing_sums(Utils::Vector3i const &shift, Utils::Vector3i nm{}; Utils::Vector3d fnm{}; for_each_3d( - m_start, m_stop, indices, + mesh_start, mesh_stop, indices, [&]() { auto const norm_sq = nm.norm2(); auto const ex = exp(-factor1 * norm_sq); @@ -202,14 +199,14 @@ static double p3m_k_space_error(double pref, Utils::Vector3i const &mesh, auto const cotangent_sum = math::get_analytic_cotangent_sum_kernel(cao); auto const mesh_i = 1. / Utils::Vector3d(mesh); auto const alpha_L_i = 1. / alpha_L; - auto const m_stop = mesh / 2; - auto const m_start = -m_stop; + auto const mesh_stop = mesh / 2; + auto const mesh_start = -mesh_stop; auto indices = Utils::Vector3i{}; auto values = Utils::Vector3d{}; auto he_q = 0.; for_each_3d( - m_start, m_stop, indices, + mesh_start, mesh_stop, indices, [&]() { if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) { auto const n2 = indices.norm2(); @@ -253,18 +250,9 @@ void CoulombP3M::init() { elc_layer = actor->elc.space_layer; } + assert(p3m.fft); p3m.local_mesh.calc_local_ca_mesh(p3m.params, local_geo, skin, elc_layer); - p3m.sm.resize(comm_cart, p3m.local_mesh); - - int ca_mesh_size = fft_init(p3m.local_mesh.dim, p3m.local_mesh.margin, - p3m.params.mesh, p3m.params.mesh_off, p3m.ks_pnum, - p3m.fft, ::communicator.node_grid, comm_cart); - p3m.rs_mesh.resize(ca_mesh_size); - - for (auto &e : p3m.E_mesh) { - e.resize(ca_mesh_size); - } - + p3m.fft->init_fft(); p3m.calc_differential_operator(); /* fix box length dependent constants */ @@ -290,7 +278,7 @@ CoulombP3M::CoulombP3M(P3MParameters &¶meters, double prefactor, namespace { template struct AssignCharge { - void operator()(p3m_data_struct &p3m, double q, + void operator()(decltype(CoulombP3M::p3m) &p3m, double q, Utils::Vector3d const &real_pos, p3m_interpolation_cache &inter_weights) { auto const w = p3m_calculate_interpolation_weights( @@ -299,21 +287,22 @@ template struct AssignCharge { inter_weights.store(w); p3m_interpolate(p3m.local_mesh, w, [q, &p3m](int ind, double w) { - p3m.rs_mesh[ind] += w * q; + p3m.mesh.rs_scalar[ind] += w * q; }); } - void operator()(p3m_data_struct &p3m, double q, + void operator()(decltype(CoulombP3M::p3m) &p3m, double q, Utils::Vector3d const &real_pos) { p3m_interpolate( p3m.local_mesh, p3m_calculate_interpolation_weights(real_pos, p3m.params.ai, p3m.local_mesh), - [q, &p3m](int ind, double w) { p3m.rs_mesh[ind] += w * q; }); + [q, &p3m](int ind, double w) { p3m.mesh.rs_scalar[ind] += w * q; }); } template - void operator()(p3m_data_struct &p3m, combined_ranges const &p_q_pos_range) { + void operator()(decltype(CoulombP3M::p3m) &p3m, + combined_ranges const &p_q_pos_range) { for (auto zipped : p_q_pos_range) { auto const p_q = boost::get<0>(zipped); auto const &p_pos = boost::get<1>(zipped); @@ -330,7 +319,7 @@ void CoulombP3M::charge_assign(ParticleRange const &particles) { /* prepare local FFT mesh */ for (int i = 0; i < p3m.local_mesh.size; i++) - p3m.rs_mesh[i] = 0.0; + p3m.mesh.rs_scalar[i] = 0.0; auto p_q_range = ParticlePropertyRange::charge_range(particles); auto p_pos_range = ParticlePropertyRange::pos_range(particles); @@ -352,7 +341,7 @@ void CoulombP3M::assign_charge(double q, Utils::Vector3d const &real_pos) { template struct AssignForces { template - void operator()(p3m_data_struct &p3m, double force_prefac, + void operator()(decltype(CoulombP3M::p3m) &p3m, double force_prefac, combined_ranges const &p_q_force_range) const { assert(cao == p3m.inter_weights.cao()); @@ -369,8 +358,9 @@ template struct AssignForces { Utils::Vector3d force{}; p3m_interpolate(p3m.local_mesh, w, [&force, &p3m](int ind, double w) { - force += w * Utils::Vector3d{p3m.E_mesh[0][ind], p3m.E_mesh[1][ind], - p3m.E_mesh[2][ind]}; + force += w * Utils::Vector3d{p3m.mesh.rs_fields[0u][ind], + p3m.mesh.rs_fields[1u][ind], + p3m.mesh.rs_fields[2u][ind]}; }); p_force -= pref * force; @@ -400,36 +390,35 @@ static auto calc_dipole_moment(boost::mpi::communicator const &comm, */ Utils::Vector9d CoulombP3M::long_range_pressure(ParticleRange const &particles) { - using namespace detail::FFT_indexing; - auto const &box_geo = *get_system().box_geo; - Utils::Vector9d node_k_space_pressure_tensor{}; if (p3m.sum_q2 > 0.) { charge_assign(particles); - p3m.sm.gather_grid(p3m.rs_mesh.data(), comm_cart, p3m.local_mesh.dim); - fft_perform_forw(p3m.rs_mesh.data(), p3m.fft, comm_cart); + p3m.fft->perform_fwd_fft(); - auto constexpr m_start = Utils::Vector3i::broadcast(0); + auto constexpr mesh_start = Utils::Vector3i::broadcast(0); + auto const &mesh_stop = p3m.mesh.size; + auto const &offset = p3m.mesh.start; auto const half_alpha_inv_sq = Utils::sqr(1. / 2. / p3m.params.alpha); - auto const m_stop = std::span(p3m.fft.plan[3].new_mesh); - auto const offset = std::span(p3m.fft.plan[3].start); auto const wavevector = (2. * std::numbers::pi) * box_geo.length_inv(); + auto const [KX, KY, KZ] = p3m.fft->get_permutations(); auto indices = Utils::Vector3i{}; auto index = std::size_t(0u); auto diagonal = 0.; - for_each_3d(m_start, m_stop, indices, [&]() { - auto const kx = p3m.d_op[RX][indices[KX] + offset[KX]] * wavevector[RX]; - auto const ky = p3m.d_op[RY][indices[KY] + offset[KY]] * wavevector[RY]; - auto const kz = p3m.d_op[RZ][indices[KZ] + offset[KZ]] * wavevector[RZ]; + for_each_3d(mesh_start, mesh_stop, indices, [&]() { + auto const shift = indices + offset; + auto const kx = p3m.d_op[0u][shift[KX]] * wavevector[0u]; + auto const ky = p3m.d_op[1u][shift[KY]] * wavevector[1u]; + auto const kz = p3m.d_op[2u][shift[KZ]] * wavevector[2u]; auto const norm_sq = Utils::sqr(kx) + Utils::sqr(ky) + Utils::sqr(kz); if (norm_sq != 0.) { auto const node_k_space_energy = - p3m.g_energy[index] * (Utils::sqr(p3m.rs_mesh[2u * index + 0u]) + - Utils::sqr(p3m.rs_mesh[2u * index + 1u])); + p3m.g_energy[index] * + (Utils::sqr(p3m.mesh.rs_scalar[2u * index + 0u]) + + Utils::sqr(p3m.mesh.rs_scalar[2u * index + 1u])); auto const vterm = -2. * (1. / norm_sq + half_alpha_inv_sq); auto const pref = node_k_space_energy * vterm; node_k_space_pressure_tensor[0u] += pref * kx * kx; /* sigma_xx */ @@ -470,10 +459,7 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag, system.coulomb.impl->solver)) { charge_assign(particles); } - /* Gather information for FFT grid inside the nodes domain (inner local - * mesh) and perform forward 3D FFT (Charge Assignment Mesh). */ - p3m.sm.gather_grid(p3m.rs_mesh.data(), comm_cart, p3m.local_mesh.dim); - fft_perform_forw(p3m.rs_mesh.data(), p3m.fft, comm_cart); + p3m.fft->perform_fwd_fft(); } auto p_q_range = ParticlePropertyRange::charge_range(particles); @@ -496,45 +482,41 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag, /* === k-space force calculation === */ if (force_flag) { /* i*k differentiation */ - auto constexpr m_start = Utils::Vector3i::broadcast(0); - auto const m_stop = std::span(p3m.fft.plan[3].new_mesh); - auto const offset = std::span(p3m.fft.plan[3].start); + auto constexpr mesh_start = Utils::Vector3i::broadcast(0); + auto const &mesh_stop = p3m.mesh.size; + auto const &offset = p3m.mesh.start; auto const wavevector = (2. * std::numbers::pi) * box_geo.length_inv(); auto indices = Utils::Vector3i{}; auto index = std::size_t(0u); /* compute electric field */ // Eq. (3.49) @cite deserno00b - for_each_3d(m_start, m_stop, indices, [&]() { - auto const rho_hat = std::complex(p3m.rs_mesh[2u * index + 0u], - p3m.rs_mesh[2u * index + 1u]); + for_each_3d(mesh_start, mesh_stop, indices, [&]() { + auto const rho_hat = + std::complex(p3m.mesh.rs_scalar[2u * index + 0u], + p3m.mesh.rs_scalar[2u * index + 1u]); auto const phi_hat = p3m.g_force[index] * rho_hat; for (int d = 0; d < 3; d++) { /* direction in r-space: */ - int d_rs = (d + p3m.ks_pnum) % 3; + int d_rs = (d + p3m.mesh.ks_pnum) % 3; /* directions */ auto const k = p3m.d_op[d_rs][indices[d] + offset[d]] * wavevector[d_rs]; /* i*k*(Re+i*Im) = - Im*k + i*Re*k (i=sqrt(-1)) */ - p3m.E_mesh[d_rs][2u * index + 0u] = -k * phi_hat.imag(); - p3m.E_mesh[d_rs][2u * index + 1u] = +k * phi_hat.real(); + p3m.mesh.rs_fields[d_rs][2u * index + 0u] = -k * phi_hat.imag(); + p3m.mesh.rs_fields[d_rs][2u * index + 1u] = +k * phi_hat.real(); } ++index; }); - /* Back FFT force component mesh */ - auto const check_complex = !p3m.params.tuning and check_complex_residuals; - for (int d = 0; d < 3; d++) { - fft_perform_back(p3m.E_mesh[d].data(), check_complex, p3m.fft, comm_cart); - } - - /* redistribute force component mesh */ - std::array E_fields = { - {p3m.E_mesh[0].data(), p3m.E_mesh[1].data(), p3m.E_mesh[2].data()}}; - p3m.sm.spread_grid(E_fields, comm_cart, p3m.local_mesh.dim); + auto const check_residuals = + not p3m.params.tuning and check_complex_residuals; + p3m.fft->check_complex_residuals = check_residuals; + p3m.fft->perform_field_back_fft(); + p3m.fft->check_complex_residuals = false; auto const force_prefac = prefactor / volume; Utils::integral_parameter( @@ -556,11 +538,13 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag, /* === k-space energy calculation === */ if (energy_flag or npt_flag) { auto node_energy = 0.; - for (int i = 0; i < p3m.fft.plan[3].new_size; i++) { + auto const mesh_length = Utils::product(p3m.mesh.size); + for (int i = 0; i < mesh_length; i++) { // Use the energy optimized influence function for energy! // Eq. (3.40) @cite deserno00b - node_energy += p3m.g_energy[i] * (Utils::sqr(p3m.rs_mesh[2 * i]) + - Utils::sqr(p3m.rs_mesh[2 * i + 1])); + node_energy += + p3m.g_energy[i] * (Utils::sqr(p3m.mesh.rs_scalar[2 * i]) + + Utils::sqr(p3m.mesh.rs_scalar[2 * i + 1])); } node_energy /= 2. * volume; @@ -596,7 +580,7 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag, } class CoulombTuningAlgorithm : public TuningAlgorithm { - p3m_data_struct &p3m; + decltype(CoulombP3M::p3m) &p3m; double m_mesh_density_min = -1., m_mesh_density_max = -1.; // indicates if mesh should be tuned bool m_tune_mesh = false; @@ -605,7 +589,7 @@ class CoulombTuningAlgorithm : public TuningAlgorithm { P3MParameters &get_params() override { return p3m.params; } public: - CoulombTuningAlgorithm(System::System &system, p3m_data_struct &input_p3m, + CoulombTuningAlgorithm(System::System &system, decltype(p3m) &input_p3m, double prefactor, int timings) : TuningAlgorithm(system, prefactor, timings), p3m{input_p3m} {} diff --git a/src/core/electrostatics/p3m.hpp b/src/core/electrostatics/p3m.hpp index 7f485dca30a..de7bbdee9fa 100644 --- a/src/core/electrostatics/p3m.hpp +++ b/src/core/electrostatics/p3m.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2022 The ESPResSo project + * Copyright (C) 2010-2024 The ESPResSo project * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 * Max-Planck-Institute for Polymer Research, Theory Group * @@ -42,7 +42,6 @@ #include "p3m/common.hpp" #include "p3m/data_struct.hpp" -#include "p3m/fft.hpp" #include "p3m/interpolation.hpp" #include "p3m/send_mesh.hpp" @@ -54,37 +53,26 @@ #include #include #include +#include -struct p3m_data_struct : public p3m_data_struct_base { - explicit p3m_data_struct(P3MParameters &¶meters) - : p3m_data_struct_base{std::move(parameters)} {} - - /** local mesh. */ - P3MLocalMesh local_mesh; - /** real space mesh (local) for CA/FFT. */ - fft_vector rs_mesh; - /** mesh (local) for the electric field. */ - std::array, 3> E_mesh; - - /** number of charged particles (only on head node). */ - int sum_qpart = 0; - /** Sum of square of charges (only on head node). */ - double sum_q2 = 0.; - /** square of sum of charges (only on head node). */ - double square_sum_q = 0.; - - p3m_interpolation_cache inter_weights; +/** @brief P3M solver. */ +struct CoulombP3M : public Coulomb::Actor { + struct p3m_data_struct_impl : public p3m_data_struct { + explicit p3m_data_struct_impl(P3MParameters &¶meters) + : p3m_data_struct{std::move(parameters)} {} - /** send/recv mesh sizes */ - p3m_send_mesh sm; + /** number of charged particles (only on head node). */ + int sum_qpart = 0; + /** Sum of square of charges (only on head node). */ + double sum_q2 = 0.; + /** square of sum of charges (only on head node). */ + double square_sum_q = 0.; - fft_data_struct fft; -}; + p3m_interpolation_cache inter_weights; + }; -/** @brief P3M solver. */ -struct CoulombP3M : public Coulomb::Actor { /** P3M parameters. */ - p3m_data_struct p3m; + p3m_data_struct_impl p3m; int tune_timings; bool tune_verbose; diff --git a/src/core/fft/CMakeLists.txt b/src/core/fft/CMakeLists.txt new file mode 100644 index 00000000000..359cc857ac8 --- /dev/null +++ b/src/core/fft/CMakeLists.txt @@ -0,0 +1,24 @@ +# +# Copyright (C) 2018-2024 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +if(FFTW3_FOUND) + target_link_libraries(espresso_core PUBLIC FFTW3::FFTW3) +endif() + +target_sources(espresso_core PRIVATE fft.cpp) diff --git a/src/core/p3m/fft.cpp b/src/core/fft/fft.cpp similarity index 75% rename from src/core/p3m/fft.cpp rename to src/core/fft/fft.cpp index bd6ad27e38c..29997d3c897 100644 --- a/src/core/p3m/fft.cpp +++ b/src/core/fft/fft.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2022 The ESPResSo project + * Copyright (C) 2010-2024 The ESPResSo project * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 * Max-Planck-Institute for Polymer Research, Theory Group * @@ -25,19 +25,19 @@ * */ -#include "config/config.hpp" - -#if defined(P3M) || defined(DP3M) - -#include "p3m/fft.hpp" +#include "fft.hpp" +#include "vector.hpp" #include #include #include +#include + #include #include +#include #include #include #include @@ -59,6 +59,8 @@ using Utils::permute_ifield; #define REQ_FFT_BACK 302 /**@}*/ +namespace fft { + /** This ugly function does the bookkeeping: which nodes have to * communicate to each other, when you change the node grid. * Changing the regular decomposition requires communication. This @@ -183,7 +185,7 @@ namespace { * \param[in] n_pos Position of the node in @p n_grid. * \param[in] n_grid node grid. * \param[in] mesh global mesh dimensions. - * \param[in] mesh_off global mesh offset (see \ref p3m_data_struct). + * \param[in] mesh_off global mesh offset. * \param[out] loc_mesh local mesh dimension. * \param[out] start first point of local mesh in global mesh. * \return Number of mesh points in local mesh. @@ -228,7 +230,7 @@ int calc_local_mesh(const int *n_pos, const int *n_grid, const int *mesh, * \param[in] pos2 Position of recv node in @p grid2. * \param[in] grid2 node grid 2. * \param[in] mesh global mesh dimensions. - * \param[in] mesh_off global mesh offset (see \ref p3m_data_struct). + * \param[in] mesh_off global mesh offset. * \param[out] block send block specification. * \return Size of the send block. */ @@ -349,69 +351,69 @@ void pack_block_permute2(double const *const in, double *const out, } } +} // namespace + /** Communicate the grid data according to the given forward FFT plan. + * \param comm MPI communicator. * \param plan FFT communication plan. * \param in input mesh. * \param out output mesh. - * \param fft FFT communication plan. - * \param comm MPI communicator. */ -void forw_grid_comm(fft_forw_plan plan, const double *in, double *out, - fft_data_struct &fft, - const boost::mpi::communicator &comm) { +void fft_data_struct::forw_grid_comm(boost::mpi::communicator const &comm, + fft_forw_plan const &plan, + double const *in, double *out) { for (std::size_t i = 0ul; i < plan.group.size(); i++) { - plan.pack_function(in, fft.send_buf.data(), &(plan.send_block[6ul * i]), + plan.pack_function(in, send_buf.data(), &(plan.send_block[6ul * i]), &(plan.send_block[6ul * i + 3ul]), plan.old_mesh, plan.element); if (plan.group[i] != comm.rank()) { - MPI_Sendrecv(fft.send_buf.data(), plan.send_size[i], MPI_DOUBLE, - plan.group[i], REQ_FFT_FORW, fft.recv_buf.data(), + MPI_Sendrecv(send_buf.data(), plan.send_size[i], MPI_DOUBLE, + plan.group[i], REQ_FFT_FORW, recv_buf.data(), plan.recv_size[i], MPI_DOUBLE, plan.group[i], REQ_FFT_FORW, comm, MPI_STATUS_IGNORE); } else { /* Self communication... */ - std::swap(fft.send_buf, fft.recv_buf); + std::swap(send_buf, recv_buf); } - fft_unpack_block(fft.recv_buf.data(), out, &(plan.recv_block[6ul * i]), + fft_unpack_block(recv_buf.data(), out, &(plan.recv_block[6ul * i]), &(plan.recv_block[6ul * i + 3ul]), plan.new_mesh, plan.element); } } /** Communicate the grid data according to the given backward FFT plan. + * \param comm MPI communicator. * \param plan_f Forward FFT plan. * \param plan_b Backward FFT plan. * \param in input mesh. * \param out output mesh. - * \param fft FFT communication plan. - * \param comm MPI communicator. */ -void back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, - const double *in, double *out, fft_data_struct &fft, - const boost::mpi::communicator &comm) { +void fft_data_struct::back_grid_comm(boost::mpi::communicator const &comm, + fft_forw_plan const &plan_f, + fft_back_plan const &plan_b, + double const *in, double *out) { /* Back means: Use the send/receive stuff from the forward plan but replace the receive blocks by the send blocks and vice versa. Attention then also new_mesh and old_mesh are exchanged */ for (std::size_t i = 0ul; i < plan_f.group.size(); i++) { - plan_b.pack_function(in, fft.send_buf.data(), &(plan_f.recv_block[6ul * i]), + plan_b.pack_function(in, send_buf.data(), &(plan_f.recv_block[6ul * i]), &(plan_f.recv_block[6ul * i + 3ul]), plan_f.new_mesh, plan_f.element); if (plan_f.group[i] != comm.rank()) { /* send first, receive second */ - MPI_Sendrecv(fft.send_buf.data(), plan_f.recv_size[i], MPI_DOUBLE, - plan_f.group[i], REQ_FFT_BACK, fft.recv_buf.data(), + MPI_Sendrecv(send_buf.data(), plan_f.recv_size[i], MPI_DOUBLE, + plan_f.group[i], REQ_FFT_BACK, recv_buf.data(), plan_f.send_size[i], MPI_DOUBLE, plan_f.group[i], REQ_FFT_BACK, comm, MPI_STATUS_IGNORE); } else { /* Self communication... */ - std::swap(fft.send_buf, fft.recv_buf); + std::swap(send_buf, recv_buf); } - fft_unpack_block(fft.recv_buf.data(), out, &(plan_f.send_block[6ul * i]), + fft_unpack_block(recv_buf.data(), out, &(plan_f.send_block[6ul * i]), &(plan_f.send_block[6ul * i + 3ul]), plan_f.old_mesh, plan_f.element); } } -} // namespace /** Calculate 'best' mapping between a 2D and 3D grid. * Required for the communication from 3D regular domain @@ -478,11 +480,12 @@ static void calc_2d_grid(int n, int grid[3]) { } } -int fft_init(Utils::Vector3i const &ca_mesh_dim, int const *ca_mesh_margin, - Utils::Vector3i const &global_mesh_dim, - Utils::Vector3d const &global_mesh_off, int &ks_pnum, - fft_data_struct &fft, Utils::Vector3i const &grid, - boost::mpi::communicator const &comm) { +int fft_data_struct::initialize_fft(boost::mpi::communicator const &comm, + Utils::Vector3i const &ca_mesh_dim, + int const *ca_mesh_margin, + Utils::Vector3i const &global_mesh_dim, + Utils::Vector3d const &global_mesh_off, + int &ks_pnum, Utils::Vector3i const &grid) { int n_grid[4][3]; /* The four node grids. */ int my_pos[4][3]; /* The position of comm.rank() in the node grids. */ @@ -493,8 +496,8 @@ int fft_init(Utils::Vector3i const &ca_mesh_dim, int const *ca_mesh_margin, int node_pos[3]; MPI_Cart_coords(comm, rank, 3, node_pos); - fft.max_comm_size = 0; - fft.max_mesh_size = 0; + max_comm_size = 0; + max_mesh_size = 0; for (int i = 0; i < 4; i++) { n_id[i].resize(1 * comm.size()); n_pos[i].resize(3 * comm.size()); @@ -517,21 +520,21 @@ int fft_init(Utils::Vector3i const &ca_mesh_dim, int const *ca_mesh_margin, /* FFT node grids (n_grid[1 - 3]) */ calc_2d_grid(comm.size(), n_grid[1]); /* resort n_grid[1] dimensions if necessary */ - fft.plan[1].row_dir = map_3don2d_grid(n_grid[0], n_grid[1]); - fft.plan[0].n_permute = 0; + forw[1].row_dir = map_3don2d_grid(n_grid[0], n_grid[1]); + forw[0].n_permute = 0; for (int i = 1; i < 4; i++) - fft.plan[i].n_permute = (fft.plan[1].row_dir + i) % 3; + forw[i].n_permute = (forw[1].row_dir + i) % 3; for (int i = 0; i < 3; i++) { n_grid[2][i] = n_grid[1][(i + 1) % 3]; n_grid[3][i] = n_grid[1][(i + 2) % 3]; } - fft.plan[2].row_dir = (fft.plan[1].row_dir - 1) % 3; - fft.plan[3].row_dir = (fft.plan[1].row_dir - 2) % 3; + forw[2].row_dir = (forw[1].row_dir - 1) % 3; + forw[3].row_dir = (forw[1].row_dir - 2) % 3; /* === communication groups === */ /* copy local mesh off real space charge assignment grid */ for (int i = 0; i < 3; i++) - fft.plan[0].new_mesh[i] = ca_mesh_dim[i]; + forw[0].new_mesh[i] = ca_mesh_dim[i]; for (int i = 1; i < 4; i++) { auto group = find_comm_groups( @@ -540,8 +543,8 @@ int fft_init(Utils::Vector3i const &ca_mesh_dim, int const *ca_mesh_margin, std::span(n_id[i]), std::span(n_pos[i]), my_pos[i], rank); if (not group) { /* try permutation */ - std::swap(n_grid[i][(fft.plan[i].row_dir + 1) % 3], - n_grid[i][(fft.plan[i].row_dir + 2) % 3]); + std::swap(n_grid[i][(forw[i].row_dir + 1) % 3], + n_grid[i][(forw[i].row_dir + 2) % 3]); group = find_comm_groups( {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]}, @@ -553,188 +556,185 @@ int fft_init(Utils::Vector3i const &ca_mesh_dim, int const *ca_mesh_margin, } } - fft.plan[i].group = group.value(); + forw[i].group = group.value(); - fft.plan[i].send_block.resize(6 * fft.plan[i].group.size()); - fft.plan[i].send_size.resize(fft.plan[i].group.size()); - fft.plan[i].recv_block.resize(6 * fft.plan[i].group.size()); - fft.plan[i].recv_size.resize(fft.plan[i].group.size()); + forw[i].send_block.resize(6 * forw[i].group.size()); + forw[i].send_size.resize(forw[i].group.size()); + forw[i].recv_block.resize(6 * forw[i].group.size()); + forw[i].recv_size.resize(forw[i].group.size()); - fft.plan[i].new_size = calc_local_mesh( + forw[i].new_size = calc_local_mesh( my_pos[i], n_grid[i], global_mesh_dim.data(), global_mesh_off.data(), - fft.plan[i].new_mesh, fft.plan[i].start); - permute_ifield(fft.plan[i].new_mesh, 3, -(fft.plan[i].n_permute)); - permute_ifield(fft.plan[i].start, 3, -(fft.plan[i].n_permute)); - fft.plan[i].n_ffts = fft.plan[i].new_mesh[0] * fft.plan[i].new_mesh[1]; + forw[i].new_mesh, forw[i].start); + permute_ifield(forw[i].new_mesh, 3, -(forw[i].n_permute)); + permute_ifield(forw[i].start, 3, -(forw[i].n_permute)); + forw[i].n_ffts = forw[i].new_mesh[0] * forw[i].new_mesh[1]; /* === send/recv block specifications === */ - for (std::size_t j = 0ul; j < fft.plan[i].group.size(); j++) { + for (std::size_t j = 0ul; j < forw[i].group.size(); j++) { /* send block: comm.rank() to comm-group-node i (identity: node) */ - int node = fft.plan[i].group[j]; - fft.plan[i].send_size[j] = calc_send_block( + int node = forw[i].group[j]; + forw[i].send_size[j] = calc_send_block( my_pos[i - 1], n_grid[i - 1], &(n_pos[i][3 * node]), n_grid[i], global_mesh_dim.data(), global_mesh_off.data(), - &(fft.plan[i].send_block[6ul * j])); - permute_ifield(&(fft.plan[i].send_block[6ul * j]), 3, - -(fft.plan[i - 1].n_permute)); - permute_ifield(&(fft.plan[i].send_block[6ul * j + 3ul]), 3, - -(fft.plan[i - 1].n_permute)); - if (fft.plan[i].send_size[j] > fft.max_comm_size) - fft.max_comm_size = fft.plan[i].send_size[j]; + &(forw[i].send_block[6ul * j])); + permute_ifield(&(forw[i].send_block[6ul * j]), 3, + -(forw[i - 1].n_permute)); + permute_ifield(&(forw[i].send_block[6ul * j + 3ul]), 3, + -(forw[i - 1].n_permute)); + if (forw[i].send_size[j] > max_comm_size) + max_comm_size = forw[i].send_size[j]; /* First plan send blocks have to be adjusted, since the CA grid may have an additional margin outside the actual domain of the node */ if (i == 1) { for (std::size_t k = 0ul; k < 3ul; k++) - fft.plan[1].send_block[6ul * j + k] += ca_mesh_margin[2ul * k]; + forw[1].send_block[6ul * j + k] += ca_mesh_margin[2ul * k]; } /* recv block: comm.rank() from comm-group-node i (identity: node) */ - fft.plan[i].recv_size[j] = calc_send_block( + forw[i].recv_size[j] = calc_send_block( my_pos[i], n_grid[i], &(n_pos[i - 1][3 * node]), n_grid[i - 1], global_mesh_dim.data(), global_mesh_off.data(), - &(fft.plan[i].recv_block[6ul * j])); - permute_ifield(&(fft.plan[i].recv_block[6ul * j]), 3, - -(fft.plan[i].n_permute)); - permute_ifield(&(fft.plan[i].recv_block[6ul * j + 3ul]), 3, - -(fft.plan[i].n_permute)); - if (fft.plan[i].recv_size[j] > fft.max_comm_size) - fft.max_comm_size = fft.plan[i].recv_size[j]; + &(forw[i].recv_block[6ul * j])); + permute_ifield(&(forw[i].recv_block[6ul * j]), 3, -(forw[i].n_permute)); + permute_ifield(&(forw[i].recv_block[6ul * j + 3ul]), 3, + -(forw[i].n_permute)); + if (forw[i].recv_size[j] > max_comm_size) + max_comm_size = forw[i].recv_size[j]; } for (std::size_t j = 0ul; j < 3ul; j++) - fft.plan[i].old_mesh[j] = fft.plan[i - 1].new_mesh[j]; + forw[i].old_mesh[j] = forw[i - 1].new_mesh[j]; if (i == 1) { - fft.plan[i].element = 1; + forw[i].element = 1; } else { - fft.plan[i].element = 2; - for (std::size_t j = 0ul; j < fft.plan[i].group.size(); j++) { - fft.plan[i].send_size[j] *= 2; - fft.plan[i].recv_size[j] *= 2; + forw[i].element = 2; + for (std::size_t j = 0ul; j < forw[i].group.size(); j++) { + forw[i].send_size[j] *= 2; + forw[i].recv_size[j] *= 2; } } } /* Factor 2 for complex fields */ - fft.max_comm_size *= 2; - fft.max_mesh_size = Utils::product(ca_mesh_dim); + max_comm_size *= 2; + max_mesh_size = Utils::product(ca_mesh_dim); for (int i = 1; i < 4; i++) - if (2 * fft.plan[i].new_size > fft.max_mesh_size) - fft.max_mesh_size = 2 * fft.plan[i].new_size; + if (2 * forw[i].new_size > max_mesh_size) + max_mesh_size = 2 * forw[i].new_size; /* === pack function === */ for (int i = 1; i < 4; i++) { - fft.plan[i].pack_function = pack_block_permute2; + forw[i].pack_function = pack_block_permute2; } ks_pnum = 6; - if (fft.plan[1].row_dir == 2) { - fft.plan[1].pack_function = fft_pack_block; + if (forw[1].row_dir == 2) { + forw[1].pack_function = fft_pack_block; ks_pnum = 4; - } else if (fft.plan[1].row_dir == 1) { - fft.plan[1].pack_function = pack_block_permute1; + } else if (forw[1].row_dir == 1) { + forw[1].pack_function = pack_block_permute1; ks_pnum = 5; } - fft.send_buf.resize(fft.max_comm_size); - fft.recv_buf.resize(fft.max_comm_size); - fft.data_buf.resize(fft.max_mesh_size); - auto *c_data = (fftw_complex *)(fft.data_buf.data()); + send_buf.resize(max_comm_size); + recv_buf.resize(max_comm_size); + data_buf.resize(max_mesh_size); + auto *c_data = (fftw_complex *)(data_buf.data()); /* === FFT Routines (Using FFTW / RFFTW package)=== */ for (int i = 1; i < 4; i++) { - fft.plan[i].dir = FFTW_FORWARD; - /* FFT plan creation.*/ - - if (fft.init_tag) - fftw_destroy_plan(fft.plan[i].our_fftw_plan); - fft.plan[i].our_fftw_plan = fftw_plan_many_dft( - 1, &fft.plan[i].new_mesh[2], fft.plan[i].n_ffts, c_data, nullptr, 1, - fft.plan[i].new_mesh[2], c_data, nullptr, 1, fft.plan[i].new_mesh[2], - fft.plan[i].dir, FFTW_PATIENT); + if (init_tag) { + forw[i].destroy_plan(); + } + forw[i].dir = FFTW_FORWARD; + forw[i].plan_handle = + fftw_plan_many_dft(1, &forw[i].new_mesh[2], forw[i].n_ffts, c_data, + nullptr, 1, forw[i].new_mesh[2], c_data, nullptr, 1, + forw[i].new_mesh[2], forw[i].dir, FFTW_PATIENT); + assert(forw[i].plan_handle); } /* === The BACK Direction === */ /* this is needed because slightly different functions are used */ for (int i = 1; i < 4; i++) { - fft.back[i].dir = FFTW_BACKWARD; - - if (fft.init_tag) - fftw_destroy_plan(fft.back[i].our_fftw_plan); - fft.back[i].our_fftw_plan = fftw_plan_many_dft( - 1, &fft.plan[i].new_mesh[2], fft.plan[i].n_ffts, c_data, nullptr, 1, - fft.plan[i].new_mesh[2], c_data, nullptr, 1, fft.plan[i].new_mesh[2], - fft.back[i].dir, FFTW_PATIENT); - - fft.back[i].pack_function = pack_block_permute1; + if (init_tag) { + back[i].destroy_plan(); + } + back[i].dir = FFTW_BACKWARD; + back[i].plan_handle = + fftw_plan_many_dft(1, &forw[i].new_mesh[2], forw[i].n_ffts, c_data, + nullptr, 1, forw[i].new_mesh[2], c_data, nullptr, 1, + forw[i].new_mesh[2], back[i].dir, FFTW_PATIENT); + back[i].pack_function = pack_block_permute1; + assert(back[i].plan_handle); } - if (fft.plan[1].row_dir == 2) { - fft.back[1].pack_function = fft_pack_block; - } else if (fft.plan[1].row_dir == 1) { - fft.back[1].pack_function = pack_block_permute2; + if (forw[1].row_dir == 2) { + back[1].pack_function = fft_pack_block; + } else if (forw[1].row_dir == 1) { + back[1].pack_function = pack_block_permute2; } - fft.init_tag = true; + init_tag = true; - return fft.max_mesh_size; + return max_mesh_size; } -void fft_perform_forw(double *data, fft_data_struct &fft, - const boost::mpi::communicator &comm) { +void fft_data_struct::forward_fft(boost::mpi::communicator const &comm, + double *data) { /* ===== first direction ===== */ auto *c_data = (fftw_complex *)data; - auto *c_data_buf = (fftw_complex *)fft.data_buf.data(); + auto *c_data_buf = (fftw_complex *)data_buf.data(); /* communication to current dir row format (in is data) */ - forw_grid_comm(fft.plan[1], data, fft.data_buf.data(), fft, comm); + forw_grid_comm(comm, forw[1], data, data_buf.data()); - /* complexify the real data array (in is fft.data_buf) */ - for (int i = 0; i < fft.plan[1].new_size; i++) { - data[2 * i + 0] = fft.data_buf[i]; /* real value */ - data[2 * i + 1] = 0; /* complex value */ + /* complexify the real data array (in is data_buf) */ + for (int i = 0; i < forw[1].new_size; i++) { + data[2 * i + 0] = data_buf[i]; /* real value */ + data[2 * i + 1] = 0; /* complex value */ } /* perform FFT (in/out is data)*/ - fftw_execute_dft(fft.plan[1].our_fftw_plan, c_data, c_data); + fftw_execute_dft(forw[1].plan_handle, c_data, c_data); /* ===== second direction ===== */ /* communication to current dir row format (in is data) */ - forw_grid_comm(fft.plan[2], data, fft.data_buf.data(), fft, comm); - /* perform FFT (in/out is fft.data_buf) */ - fftw_execute_dft(fft.plan[2].our_fftw_plan, c_data_buf, c_data_buf); + forw_grid_comm(comm, forw[2], data, data_buf.data()); + /* perform FFT (in/out is data_buf) */ + fftw_execute_dft(forw[2].plan_handle, c_data_buf, c_data_buf); /* ===== third direction ===== */ - /* communication to current dir row format (in is fft.data_buf) */ - forw_grid_comm(fft.plan[3], fft.data_buf.data(), data, fft, comm); + /* communication to current dir row format (in is data_buf) */ + forw_grid_comm(comm, forw[3], data_buf.data(), data); /* perform FFT (in/out is data)*/ - fftw_execute_dft(fft.plan[3].our_fftw_plan, c_data, c_data); + fftw_execute_dft(forw[3].plan_handle, c_data, c_data); /* REMARK: Result has to be in data. */ } -void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft, - const boost::mpi::communicator &comm) { +void fft_data_struct::backward_fft(boost::mpi::communicator const &comm, + double *data, bool check_complex) { auto *c_data = (fftw_complex *)data; - auto *c_data_buf = (fftw_complex *)fft.data_buf.data(); + auto *c_data_buf = (fftw_complex *)data_buf.data(); /* ===== third direction ===== */ /* perform FFT (in is data) */ - fftw_execute_dft(fft.back[3].our_fftw_plan, c_data, c_data); + fftw_execute_dft(back[3].plan_handle, c_data, c_data); /* communicate (in is data)*/ - back_grid_comm(fft.plan[3], fft.back[3], data, fft.data_buf.data(), fft, - comm); + back_grid_comm(comm, forw[3], back[3], data, data_buf.data()); /* ===== second direction ===== */ - /* perform FFT (in is fft.data_buf) */ - fftw_execute_dft(fft.back[2].our_fftw_plan, c_data_buf, c_data_buf); - /* communicate (in is fft.data_buf) */ - back_grid_comm(fft.plan[2], fft.back[2], fft.data_buf.data(), data, fft, - comm); + /* perform FFT (in is data_buf) */ + fftw_execute_dft(back[2].plan_handle, c_data_buf, c_data_buf); + /* communicate (in is data_buf) */ + back_grid_comm(comm, forw[2], back[2], data_buf.data(), data); /* ===== first direction ===== */ /* perform FFT (in is data) */ - fftw_execute_dft(fft.back[1].our_fftw_plan, c_data, c_data); + fftw_execute_dft(back[1].plan_handle, c_data, c_data); /* throw away the (hopefully) empty complex component (in is data) */ - for (int i = 0; i < fft.plan[1].new_size; i++) { - fft.data_buf[i] = data[2 * i]; /* real value */ + for (int i = 0; i < forw[1].new_size; i++) { + data_buf[i] = data[2 * i]; /* real value */ // Vincent: if (check_complex and std::abs(data[2 * i + 1]) > 1e-5) { printf("Complex value is not zero (i=%d,data=%g)!!!\n", i, @@ -743,9 +743,8 @@ void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft, throw std::runtime_error("Complex value is not zero"); } } - /* communicate (in is fft.data_buf) */ - back_grid_comm(fft.plan[1], fft.back[1], fft.data_buf.data(), data, fft, - comm); + /* communicate (in is data_buf) */ + back_grid_comm(comm, forw[1], back[1], data_buf.data(), data); /* REMARK: Result has to be in data. */ } @@ -797,4 +796,16 @@ void fft_unpack_block(double const *const in, double *const out, li_out += s_out_offset; } } -#endif + +void fft_plan::destroy_plan() { + if (plan_handle) { + fftw_destroy_plan(plan_handle); + plan_handle = nullptr; + } +} + +namespace detail { +void fft_free(void *const p) { ::fftw_free(p); } +void *fft_malloc(std::size_t length) { return ::fftw_malloc(length); } +} // namespace detail +} // namespace fft diff --git a/src/core/p3m/fft.hpp b/src/core/fft/fft.hpp similarity index 54% rename from src/core/p3m/fft.hpp rename to src/core/fft/fft.hpp index 03dd4a0de6a..8747448a371 100644 --- a/src/core/p3m/fft.hpp +++ b/src/core/fft/fft.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2022 The ESPResSo project + * Copyright (C) 2010-2024 The ESPResSo project * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 * Max-Planck-Institute for Polymer Research, Theory Group * @@ -26,71 +26,49 @@ * Routines, row decomposition, data structures and communication for the * 3D-FFT. * - * The 3D-FFT is split into 3 ond dimensional FFTs. The data is + * The 3D-FFT is split into three 1D-FFTs. The data is * distributed in such a way, that for the actual direction of the * FFT each node has a certain number of rows for which it performs a * 1D-FFT. After performing the FFT on that direction the data is * redistributed. * - * For simplicity at the moment I have implemented a full complex to - * complex FFT (even though a real to complex FFT would be sufficient). - * - * For more information about FFT usage, see \ref fft.cpp "fft.cpp". + * For simplicity, a full complex-to-complex FFT is implemented, + * even though a real-to-complex FFT would be sufficient. */ -#include "config/config.hpp" - -#if defined(P3M) || defined(DP3M) +#include "vector.hpp" #include -#include - -#include - +#include #include -#include -#include +#include +#include +#include +#include #include -/** Aligned allocator for fft data. */ -template struct fft_allocator { - typedef T value_type; - fft_allocator() noexcept = default; // default ctor not required - template explicit fft_allocator(const fft_allocator &) {} - template bool operator==(const fft_allocator &) const { - return true; - } - template bool operator!=(const fft_allocator &) const { - return false; - } - - T *allocate(const std::size_t n) const { - if (n == 0) { - return nullptr; - } - if (n > std::numeric_limits::max() / sizeof(T)) { - throw std::bad_array_new_length(); - } - void *const pv = fftw_malloc(n * sizeof(T)); - if (!pv) { - throw std::bad_alloc(); - } - return static_cast(pv); - } - void deallocate(T *const p, std::size_t) const noexcept { fftw_free(p); } -}; +struct fftw_plan_s; +namespace boost::mpi { +class environment; +class communicator; +} // namespace boost::mpi -template using fft_vector = std::vector>; +namespace fft { struct fft_plan { + using fftw_plan = fftw_plan_s *; + + ~fft_plan() { destroy_plan(); } + /** plan direction: forward or backward FFT (enum value from FFTW). */ int dir; /** plan for the FFT. */ - fftw_plan our_fftw_plan; + fftw_plan plan_handle = nullptr; /** packing function for send blocks. */ void (*pack_function)(double const *const, double *const, int const *, int const *, int const *, int); + void destroy_plan(); }; /** @brief Plan for a forward 1D FFT of a flattened 3D array. */ @@ -129,18 +107,27 @@ struct fft_forw_plan : public fft_plan { /** @brief Plan for a backward 1D FFT of a flattened 3D array. */ struct fft_back_plan : public fft_plan {}; -/** Information about the three one dimensional FFTs and how the nodes - * have to communicate inbetween. +/** + * @brief Information about the three one dimensional FFTs and how the nodes + * have to communicate inbetween. * - * @note FFT numbering starts with 1 for technical reasons (because we have 4 - * node grids, the index 0 is used for the real space charge assignment - * grid). + * @note FFT numbering starts with 1 for technical reasons (because we have 4 + * node grids, the index 0 is used for the real space charge assignment grid). */ -struct fft_data_struct { // NOLINT(bugprone-reserved-identifier) +struct fft_data_struct { +private: + /** + * @brief Handle to the MPI environment. + * Has to be the first member in the class definition, so that FFT plans + * are destroyed before the MPI environment expires (non-static class + * members are destroyed in the reverse order of their initialization). + */ + std::shared_ptr m_mpi_env; + /** Information for forward FFTs. */ - fft_forw_plan plan[4]; + std::array forw; /** Information for backward FFTs. */ - fft_back_plan back[4]; + std::array back; /** Whether FFT is initialized or not. */ bool init_tag = false; @@ -156,47 +143,64 @@ struct fft_data_struct { // NOLINT(bugprone-reserved-identifier) /** receive buffer. */ std::vector recv_buf; /** Buffer for receive data. */ - fft_vector data_buf; + fft::vector data_buf; + +public: + explicit fft_data_struct(decltype(m_mpi_env) mpi_env) + : m_mpi_env{std::move(mpi_env)} {} + + // disable copy construction: unsafe because we store raw pointers + // to FFT plans (avoids double-free and use-after-free) + fft_data_struct &operator=(fft_data_struct const &) = delete; + fft_data_struct(fft_data_struct const &) = delete; + + /** Initialize everything connected to the 3D-FFT. + * + * \param[in] comm MPI communicator. + * \param[in] ca_mesh_dim Local CA mesh dimensions. + * \param[in] ca_mesh_margin Local CA mesh margins. + * \param[in] global_mesh_dim Global CA mesh dimensions. + * \param[in] global_mesh_off Global CA mesh offset. + * \param[out] ks_pnum Number of permutations in k-space. + * \param[in] grid Number of nodes in each spatial dimension. + * \return Maximal size of local fft mesh (needed for allocation of ca_mesh). + */ + int initialize_fft(boost::mpi::communicator const &comm, + Utils::Vector3i const &ca_mesh_dim, + int const *ca_mesh_margin, + Utils::Vector3i const &global_mesh_dim, + Utils::Vector3d const &global_mesh_off, int &ks_pnum, + Utils::Vector3i const &grid); + + /** Perform an in-place forward 3D FFT. + * \warning The content of \a data is overwritten. + * \param[in,out] data Mesh. + * \param[in] comm MPI communicator + */ + void forward_fft(boost::mpi::communicator const &comm, double *data); + + /** Perform an in-place backward 3D FFT. + * \warning The content of \a data is overwritten. + * \param[in,out] data Mesh. + * \param[in] check_complex Throw an error if the complex component is + * non-zero. + * \param[in] comm MPI communicator. + */ + void backward_fft(boost::mpi::communicator const &comm, double *data, + bool check_complex); + + auto get_mesh_size() const { return forw[3u].new_mesh; } + + auto get_mesh_start() const { return forw[3u].start; } + +private: + void forw_grid_comm(boost::mpi::communicator const &comm, + fft_forw_plan const &plan, double const *in, double *out); + void back_grid_comm(boost::mpi::communicator const &comm, + fft_forw_plan const &plan_f, fft_back_plan const &plan_b, + double const *in, double *out); }; -/** Initialize everything connected to the 3D-FFT. - * - * \param[in] ca_mesh_dim Local CA mesh dimensions. - * \param[in] ca_mesh_margin Local CA mesh margins. - * \param[in] global_mesh_dim Global CA mesh dimensions. - * \param[in] global_mesh_off Global CA mesh offset. - * \param[out] ks_pnum Number of permutations in k-space. - * \param[out] fft FFT plan. - * \param[in] grid Number of nodes in each spatial dimension. - * \param[in] comm MPI communicator. - * \return Maximal size of local fft mesh (needed for allocation of ca_mesh). - */ -int fft_init(Utils::Vector3i const &ca_mesh_dim, int const *ca_mesh_margin, - Utils::Vector3i const &global_mesh_dim, - Utils::Vector3d const &global_mesh_off, int &ks_pnum, - fft_data_struct &fft, Utils::Vector3i const &grid, - boost::mpi::communicator const &comm); - -/** Perform an in-place forward 3D FFT. - * \warning The content of \a data is overwritten. - * \param[in,out] data Mesh. - * \param[in,out] fft FFT plan. - * \param[in] comm MPI communicator - */ -void fft_perform_forw(double *data, fft_data_struct &fft, - const boost::mpi::communicator &comm); - -/** Perform an in-place backward 3D FFT. - * \warning The content of \a data is overwritten. - * \param[in,out] data Mesh. - * \param[in] check_complex Throw an error if the complex component is - * non-zero. - * \param[in,out] fft FFT plan. - * \param[in] comm MPI communicator. - */ -void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft, - const boost::mpi::communicator &comm); - /** Pack a block (size[3] starting at start[3]) of an input * 3d-grid with dimension dim[3] into an output 3d-block with * dimension size[3]. @@ -234,4 +238,10 @@ void fft_unpack_block(double const *in, double *out, int const start[3], int map_3don2d_grid(int const g3d[3], int g2d[3]); -#endif // defined(P3M) || defined(DP3M) +std::optional> find_comm_groups(Utils::Vector3i const &, + Utils::Vector3i const &, + std::span, + std::span, std::span, + std::span, int); + +} // namespace fft diff --git a/src/core/fft/vector.hpp b/src/core/fft/vector.hpp new file mode 100644 index 00000000000..e74df7a3de0 --- /dev/null +++ b/src/core/fft/vector.hpp @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2010-2024 The ESPResSo project + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 + * Max-Planck-Institute for Polymer Research, Theory Group + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include +#include +#include +#include + +namespace fft { +namespace detail { +void fft_free(void *p); +void *fft_malloc(std::size_t length); +} // namespace detail + +/** @brief Aligned allocator for FFT data. */ +template struct allocator { + typedef T value_type; + allocator() noexcept = default; // default ctor not required + template explicit allocator(const allocator &) {} + template bool operator==(const allocator &) const { + return true; + } + template bool operator!=(const allocator &) const { + return false; + } + + T *allocate(const std::size_t n) const { + if (n == 0) { + return nullptr; + } + if (n > std::numeric_limits::max() / sizeof(T)) { + throw std::bad_array_new_length(); + } + void *const pv = detail::fft_malloc(n * sizeof(T)); + if (!pv) { + throw std::bad_alloc(); + } + return static_cast(pv); + } + + void deallocate(T *const p, std::size_t) const noexcept { + detail::fft_free(static_cast(p)); + } +}; + +template using vector = std::vector>; + +} // namespace fft diff --git a/src/core/magnetostatics/dp3m.cpp b/src/core/magnetostatics/dp3m.cpp index 4214900fb52..b3ab8871fab 100644 --- a/src/core/magnetostatics/dp3m.cpp +++ b/src/core/magnetostatics/dp3m.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2022 The ESPResSo project + * Copyright (C) 2010-2024 The ESPResSo project * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 * Max-Planck-Institute for Polymer Research, Theory Group * @@ -31,10 +31,10 @@ #include "magnetostatics/dp3m.hpp" +#include "fft/fft.hpp" #include "p3m/TuningAlgorithm.hpp" #include "p3m/TuningLogger.hpp" #include "p3m/common.hpp" -#include "p3m/fft.hpp" #include "p3m/influence_function_dipolar.hpp" #include "p3m/interpolation.hpp" #include "p3m/math.hpp" @@ -106,12 +106,9 @@ double dp3m_rtbisection(double box_size, double r_cut_iL, int n_c_part, double tuned_accuracy); double DipolarP3M::calc_average_self_energy_k_space() const { - auto const start = Utils::Vector3i{dp3m.fft.plan[3].start}; - auto const size = Utils::Vector3i{dp3m.fft.plan[3].new_mesh}; - auto const &box_geo = *get_system().box_geo; auto const node_phi = grid_influence_function_self_energy( - dp3m.params, start, start + size, dp3m.g_energy); + dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy); double phi = 0.; boost::mpi::reduce(comm_cart, node_phi, phi, std::plus<>(), 0); @@ -131,21 +128,10 @@ void DipolarP3M::init() { dp3m.params.cao3 = Utils::int_pow<3>(dp3m.params.cao); dp3m.params.recalc_a_ai_cao_cut(box_geo.length()); - dp3m.local_mesh.calc_local_ca_mesh(dp3m.params, local_geo, verlet_skin, 0.); - - dp3m.sm.resize(comm_cart, dp3m.local_mesh); - - int ca_mesh_size = - fft_init(dp3m.local_mesh.dim, dp3m.local_mesh.margin, dp3m.params.mesh, - dp3m.params.mesh_off, dp3m.ks_pnum, dp3m.fft, - ::communicator.node_grid, comm_cart); - dp3m.rs_mesh.resize(ca_mesh_size); - dp3m.ks_mesh.resize(ca_mesh_size); - - for (auto &val : dp3m.rs_mesh_dip) { - val.resize(ca_mesh_size); - } + assert(dp3m.fft); + dp3m.local_mesh.calc_local_ca_mesh(dp3m.params, local_geo, verlet_skin, 0.); + dp3m.fft->init_fft(); dp3m.calc_differential_operator(); /* fix box length dependent constants */ @@ -174,15 +160,16 @@ DipolarP3M::DipolarP3M(P3MParameters &¶meters, double prefactor, namespace { template struct AssignDipole { - void operator()(dp3m_data_struct &dp3m, Utils::Vector3d const &real_pos, + void operator()(decltype(DipolarP3M::dp3m) &dp3m, + Utils::Vector3d const &real_pos, Utils::Vector3d const &dip) const { auto const weights = p3m_calculate_interpolation_weights( real_pos, dp3m.params.ai, dp3m.local_mesh); p3m_interpolate(dp3m.local_mesh, weights, [&dip, &dp3m](int ind, double w) { - dp3m.rs_mesh_dip[0][ind] += w * dip[0]; - dp3m.rs_mesh_dip[1][ind] += w * dip[1]; - dp3m.rs_mesh_dip[2][ind] += w * dip[2]; + dp3m.mesh.rs_fields[0][ind] += w * dip[0]; + dp3m.mesh.rs_fields[1][ind] += w * dip[1]; + dp3m.mesh.rs_fields[2][ind] += w * dip[2]; }); dp3m.inter_weights.store(weights); @@ -194,9 +181,9 @@ void DipolarP3M::dipole_assign(ParticleRange const &particles) { dp3m.inter_weights.reset(dp3m.params.cao); /* prepare local FFT mesh */ - for (auto &i : dp3m.rs_mesh_dip) + for (auto &rs_mesh_field : dp3m.mesh.rs_fields) for (int j = 0; j < dp3m.local_mesh.size; j++) - i[j] = 0.; + rs_mesh_field[j] = 0.; for (auto const &p : particles) { if (p.dipm() != 0.) { @@ -208,7 +195,7 @@ void DipolarP3M::dipole_assign(ParticleRange const &particles) { namespace { template struct AssignTorques { - void operator()(dp3m_data_struct const &dp3m, double prefac, int d_rs, + void operator()(decltype(DipolarP3M::dp3m) &dp3m, double prefac, int d_rs, ParticleRange const &particles) const { /* magnetic particle index */ @@ -221,7 +208,7 @@ template struct AssignTorques { Utils::Vector3d E{}; p3m_interpolate(dp3m.local_mesh, w, [&E, &dp3m, d_rs](int ind, double w) { - E[d_rs] += w * dp3m.rs_mesh[ind]; + E[d_rs] += w * dp3m.mesh.rs_scalar[ind]; }); p.torque() -= vector_product(p.calc_dip(), prefac * E); @@ -232,7 +219,7 @@ template struct AssignTorques { }; template struct AssignForces { - void operator()(dp3m_data_struct const &dp3m, double prefac, int d_rs, + void operator()(decltype(DipolarP3M::dp3m) &dp3m, double prefac, int d_rs, ParticleRange const &particles) const { /* magnetic particle index */ @@ -244,9 +231,9 @@ template struct AssignForces { Utils::Vector3d E{}; p3m_interpolate(dp3m.local_mesh, w, [&E, &dp3m](int ind, double w) { - E[0] += w * dp3m.rs_mesh_dip[0][ind]; - E[1] += w * dp3m.rs_mesh_dip[1][ind]; - E[2] += w * dp3m.rs_mesh_dip[2][ind]; + E[0] += w * dp3m.mesh.rs_fields[0][ind]; + E[1] += w * dp3m.mesh.rs_fields[1][ind]; + E[2] += w * dp3m.mesh.rs_fields[2][ind]; }); p.force()[d_rs] += p.calc_dip() * prefac * E; @@ -273,17 +260,7 @@ double DipolarP3M::long_range_kernel(bool force_flag, bool energy_flag, if (dp3m.sum_mu2 > 0.) { dipole_assign(particles); - /* Gather information for FFT grid inside the nodes domain (inner local - * mesh) and perform forward 3D FFT (Charge Assignment Mesh). */ - std::array meshes = {{dp3m.rs_mesh_dip[0u].data(), - dp3m.rs_mesh_dip[1u].data(), - dp3m.rs_mesh_dip[2u].data()}}; - dp3m.sm.gather_grid(meshes, comm_cart, dp3m.local_mesh.dim); - for (auto i = 0u; i < 3u; ++i) { - fft_perform_forw(dp3m.rs_mesh_dip[i].data(), dp3m.fft, comm_cart); - } - // Note: after these calls, the grids are in the order yzx and not xyz - // anymore!!! + dp3m.fft->perform_fwd_fft(); } /* === k-space energy calculation === */ @@ -295,26 +272,26 @@ double DipolarP3M::long_range_kernel(bool force_flag, bool energy_flag, /* i*k differentiation for dipolar gradients: * |(\Fourier{\vect{mu}}(k)\cdot \vect{k})|^2 */ - auto constexpr m_start = Utils::Vector3i::broadcast(0); - auto const m_stop = std::span(dp3m.fft.plan[3].new_mesh); - auto const offset = std::span(dp3m.fft.plan[3].start); + auto constexpr mesh_start = Utils::Vector3i::broadcast(0); + auto const &offset = dp3m.mesh.start; auto const &d_op = dp3m.d_op[0u]; - auto const &mesh_dip = dp3m.rs_mesh_dip; + auto const &mesh_dip = dp3m.mesh.rs_fields; + auto const [KX, KY, KZ] = dp3m.fft->get_permutations(); auto indices = Utils::Vector3i{}; auto index = std::size_t(0u); auto it_energy = dp3m.g_energy.begin(); auto node_energy = 0.; - for_each_3d(m_start, m_stop, indices, [&]() { - using namespace detail::FFT_indexing; + for_each_3d(mesh_start, dp3m.mesh.size, indices, [&]() { + auto const shift = indices + offset; // Re(mu)*k - auto const re = mesh_dip[0u][index] * d_op[indices[KX] + offset[KX]] + - mesh_dip[1u][index] * d_op[indices[KY] + offset[KY]] + - mesh_dip[2u][index] * d_op[indices[KZ] + offset[KZ]]; + auto const re = mesh_dip[0u][index] * d_op[shift[KX]] + + mesh_dip[1u][index] * d_op[shift[KY]] + + mesh_dip[2u][index] * d_op[shift[KZ]]; ++index; // Im(mu)*k - auto const im = mesh_dip[0u][index] * d_op[indices[KX] + offset[KX]] + - mesh_dip[1u][index] * d_op[indices[KY] + offset[KY]] + - mesh_dip[2u][index] * d_op[indices[KZ] + offset[KZ]]; + auto const im = mesh_dip[0u][index] * d_op[shift[KX]] + + mesh_dip[1u][index] * d_op[shift[KY]] + + mesh_dip[2u][index] * d_op[shift[KZ]]; ++index; node_energy += *it_energy * (Utils::sqr(re) + Utils::sqr(im)); std::advance(it_energy, 1); @@ -344,30 +321,31 @@ double DipolarP3M::long_range_kernel(bool force_flag, bool energy_flag, ****************************/ if (dp3m.sum_mu2 > 0.) { auto const wavenumber = 2. * std::numbers::pi * box_geo.length_inv()[0u]; - auto constexpr m_start = Utils::Vector3i::broadcast(0); - auto const m_stop = std::span(dp3m.fft.plan[3].new_mesh); - auto const offset = std::span(dp3m.fft.plan[3].start); + auto constexpr mesh_start = Utils::Vector3i::broadcast(0); + auto const &mesh_stop = dp3m.mesh.size; + auto const offset = dp3m.mesh.start; auto const &d_op = dp3m.d_op[0u]; - auto const &mesh_dip = dp3m.rs_mesh_dip; + auto const [KX, KY, KZ] = dp3m.fft->get_permutations(); + auto &mesh_dip = dp3m.mesh.rs_fields; auto indices = Utils::Vector3i{}; auto index = std::size_t(0u); - /* fill in ks_mesh array for torque calculation */ + /* fill in mesh.ks_scalar array for torque calculation */ auto it_energy = dp3m.g_energy.begin(); index = 0u; - for_each_3d(m_start, m_stop, indices, [&]() { - using namespace detail::FFT_indexing; + for_each_3d(mesh_start, mesh_stop, indices, [&]() { + auto const shift = indices + offset; // Re(mu)*k - auto const re = mesh_dip[0u][index] * d_op[indices[KX] + offset[KX]] + - mesh_dip[1u][index] * d_op[indices[KY] + offset[KY]] + - mesh_dip[2u][index] * d_op[indices[KZ] + offset[KZ]]; - dp3m.ks_mesh[index] = *it_energy * re; + auto const re = mesh_dip[0u][index] * d_op[shift[KX]] + + mesh_dip[1u][index] * d_op[shift[KY]] + + mesh_dip[2u][index] * d_op[shift[KZ]]; + dp3m.mesh.ks_scalar[index] = *it_energy * re; ++index; // Im(mu)*k - auto const im = mesh_dip[0u][index] * d_op[indices[KX] + offset[KX]] + - mesh_dip[1u][index] * d_op[indices[KY] + offset[KY]] + - mesh_dip[2u][index] * d_op[indices[KZ] + offset[KZ]]; - dp3m.ks_mesh[index] = *it_energy * im; + auto const im = mesh_dip[0u][index] * d_op[shift[KX]] + + mesh_dip[1u][index] * d_op[shift[KY]] + + mesh_dip[2u][index] * d_op[shift[KZ]]; + dp3m.mesh.ks_scalar[index] = *it_energy * im; ++index; std::advance(it_energy, 1); }); @@ -375,21 +353,16 @@ double DipolarP3M::long_range_kernel(bool force_flag, bool energy_flag, /* Force component loop */ for (int d = 0; d < 3; d++) { index = 0u; - for_each_3d(m_start, m_stop, indices, [&]() { + for_each_3d(mesh_start, mesh_stop, indices, [&]() { auto const pos = indices[d] + offset[d]; - dp3m.rs_mesh[index] = d_op[pos] * dp3m.ks_mesh[index]; + dp3m.mesh.rs_scalar[index] = d_op[pos] * dp3m.mesh.ks_scalar[index]; ++index; - dp3m.rs_mesh[index] = d_op[pos] * dp3m.ks_mesh[index]; + dp3m.mesh.rs_scalar[index] = d_op[pos] * dp3m.mesh.ks_scalar[index]; ++index; }); - - /* Back FFT force component mesh */ - fft_perform_back(dp3m.rs_mesh.data(), false, dp3m.fft, comm_cart); - /* redistribute force component mesh */ - dp3m.sm.spread_grid(dp3m.rs_mesh.data(), comm_cart, - dp3m.local_mesh.dim); + dp3m.fft->perform_space_back_fft(); /* Assign force component from mesh to particle */ - auto const d_rs = (d + dp3m.ks_pnum) % 3; + auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3; Utils::integral_parameter( dp3m.params.cao, dp3m, dipole_prefac * wavenumber, d_rs, particles); } @@ -398,57 +371,50 @@ double DipolarP3M::long_range_kernel(bool force_flag, bool energy_flag, DIPOLAR FORCES (k-space) ****************************/ // Compute forces after torques because the algorithm below overwrites the - // grids dp3m.rs_mesh_dip ! + // grids dp3m.mesh.rs_fields ! // Note: I'll do here 9 inverse FFTs. By symmetry, we can reduce this // number to 6 ! - /* fill in ks_mesh array for force calculation */ + /* fill in mesh.ks_scalar array for force calculation */ auto it_force = dp3m.g_force.begin(); index = 0u; - for_each_3d(m_start, m_stop, indices, [&]() { - using namespace detail::FFT_indexing; + for_each_3d(mesh_start, mesh_stop, indices, [&]() { + auto const shift = indices + offset; // Re(mu)*k - auto const re = mesh_dip[0u][index] * d_op[indices[KX] + offset[KX]] + - mesh_dip[1u][index] * d_op[indices[KY] + offset[KY]] + - mesh_dip[2u][index] * d_op[indices[KZ] + offset[KZ]]; + auto const re = mesh_dip[0u][index] * d_op[shift[KX]] + + mesh_dip[1u][index] * d_op[shift[KY]] + + mesh_dip[2u][index] * d_op[shift[KZ]]; ++index; // Im(mu)*k - auto const im = mesh_dip[0u][index] * d_op[indices[KX] + offset[KX]] + - mesh_dip[1u][index] * d_op[indices[KY] + offset[KY]] + - mesh_dip[2u][index] * d_op[indices[KZ] + offset[KZ]]; + auto const im = mesh_dip[0u][index] * d_op[shift[KX]] + + mesh_dip[1u][index] * d_op[shift[KY]] + + mesh_dip[2u][index] * d_op[shift[KZ]]; ++index; - dp3m.ks_mesh[index - 2] = *it_force * im; - dp3m.ks_mesh[index - 1] = *it_force * (-re); + dp3m.mesh.ks_scalar[index - 2] = *it_force * im; + dp3m.mesh.ks_scalar[index - 1] = *it_force * (-re); std::advance(it_force, 1); }); /* Force component loop */ for (int d = 0; d < 3; d++) { - auto &rs_mesh_dip = dp3m.rs_mesh_dip; index = 0u; - for_each_3d(m_start, m_stop, indices, [&]() { - using namespace detail::FFT_indexing; - auto const f1 = d_op[indices[d] + offset[d]] * dp3m.ks_mesh[index]; - rs_mesh_dip[0][index] = d_op[indices[KX] + offset[KX]] * f1; - rs_mesh_dip[1][index] = d_op[indices[KY] + offset[KY]] * f1; - rs_mesh_dip[2][index] = d_op[indices[KZ] + offset[KZ]] * f1; + for_each_3d(mesh_start, mesh_stop, indices, [&]() { + auto const shift = indices + offset; + auto const f1 = + d_op[indices[d] + offset[d]] * dp3m.mesh.ks_scalar[index]; + mesh_dip[0u][index] = d_op[shift[KX]] * f1; + mesh_dip[1u][index] = d_op[shift[KY]] * f1; + mesh_dip[2u][index] = d_op[shift[KZ]] * f1; ++index; - auto const f2 = d_op[indices[d] + offset[d]] * dp3m.ks_mesh[index]; - rs_mesh_dip[0][index] = d_op[indices[KX] + offset[KX]] * f2; - rs_mesh_dip[1][index] = d_op[indices[KY] + offset[KY]] * f2; - rs_mesh_dip[2][index] = d_op[indices[KZ] + offset[KZ]] * f2; + auto const f2 = + d_op[indices[d] + offset[d]] * dp3m.mesh.ks_scalar[index]; + mesh_dip[0u][index] = d_op[shift[KX]] * f2; + mesh_dip[1u][index] = d_op[shift[KY]] * f2; + mesh_dip[2u][index] = d_op[shift[KZ]] * f2; ++index; }); - /* Back FFT force component mesh */ - for (auto i = 0u; i < 3u; ++i) { - fft_perform_back(rs_mesh_dip[i].data(), false, dp3m.fft, comm_cart); - } - /* redistribute force component mesh */ - std::array meshes = {{rs_mesh_dip[0].data(), - rs_mesh_dip[1].data(), - rs_mesh_dip[2].data()}}; - dp3m.sm.spread_grid(meshes, comm_cart, dp3m.local_mesh.dim); + dp3m.fft->perform_field_back_fft(); /* Assign force component from mesh to particle */ - auto const d_rs = (d + dp3m.ks_pnum) % 3; + auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3; Utils::integral_parameter( dp3m.params.cao, dp3m, dipole_prefac * Utils::sqr(wavenumber), d_rs, particles); @@ -490,18 +456,18 @@ double DipolarP3M::calc_surface_term(bool force_flag, bool energy_flag, std::size_t ip = 0u; for (auto const &p : particles) { auto const dip = p.calc_dip(); - mx[ip] = dip[0]; - my[ip] = dip[1]; - mz[ip] = dip[2]; + mx[ip] = dip[0u]; + my[ip] = dip[1u]; + mz[ip] = dip[2u]; ip++; } // we will need the sum of all dipolar momenta vectors auto local_dip = Utils::Vector3d{}; for (std::size_t i = 0u; i < n_local_part; i++) { - local_dip[0] += mx[i]; - local_dip[1] += my[i]; - local_dip[2] += mz[i]; + local_dip[0u] += mx[i]; + local_dip[1u] += my[i]; + local_dip[2u] += mz[i]; } auto const box_dip = boost::mpi::all_reduce(comm_cart, local_dip, std::plus<>()); @@ -523,17 +489,17 @@ double DipolarP3M::calc_surface_term(bool force_flag, bool energy_flag, std::vector sumiz(n_local_part); for (std::size_t i = 0u; i < n_local_part; i++) { - sumix[i] = my[i] * box_dip[2] - mz[i] * box_dip[1]; - sumiy[i] = mz[i] * box_dip[0] - mx[i] * box_dip[2]; - sumiz[i] = mx[i] * box_dip[1] - my[i] * box_dip[0]; + sumix[i] = my[i] * box_dip[2u] - mz[i] * box_dip[1u]; + sumiy[i] = mz[i] * box_dip[0u] - mx[i] * box_dip[2u]; + sumiz[i] = mx[i] * box_dip[1u] - my[i] * box_dip[0u]; } ip = 0u; for (auto &p : particles) { auto &torque = p.torque(); - torque[0] -= pref * sumix[ip]; - torque[1] -= pref * sumiy[ip]; - torque[2] -= pref * sumiz[ip]; + torque[0u] -= pref * sumix[ip]; + torque[1u] -= pref * sumiy[ip]; + torque[2u] -= pref * sumiz[ip]; ip++; } } @@ -542,27 +508,23 @@ double DipolarP3M::calc_surface_term(bool force_flag, bool energy_flag, } void DipolarP3M::calc_influence_function_force() { - auto const start = Utils::Vector3i{dp3m.fft.plan[3].start}; - auto const size = Utils::Vector3i{dp3m.fft.plan[3].new_mesh}; - - dp3m.g_force = grid_influence_function<3>(dp3m.params, start, start + size, - get_system().box_geo->length_inv()); + dp3m.g_force = + grid_influence_function<3>(dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, + get_system().box_geo->length_inv()); } void DipolarP3M::calc_influence_function_energy() { - auto const start = Utils::Vector3i{dp3m.fft.plan[3].start}; - auto const size = Utils::Vector3i{dp3m.fft.plan[3].new_mesh}; - - dp3m.g_energy = grid_influence_function<2>( - dp3m.params, start, start + size, get_system().box_geo->length_inv()); + dp3m.g_energy = + grid_influence_function<2>(dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, + get_system().box_geo->length_inv()); } class DipolarTuningAlgorithm : public TuningAlgorithm { - dp3m_data_struct &dp3m; + decltype(DipolarP3M::dp3m) &dp3m; int m_mesh_max = -1, m_mesh_min = -1; public: - DipolarTuningAlgorithm(System::System &system, dp3m_data_struct &input_dp3m, + DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m, double prefactor, int timings) : TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m} {} @@ -708,8 +670,8 @@ void DipolarP3M::tune() { static auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh, double mesh_i, int cao, double alpha_L_i) { - auto constexpr m_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN); - auto constexpr m_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1); + auto constexpr mesh_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN); + auto constexpr mesh_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1); auto const factor1 = Utils::sqr(std::numbers::pi * alpha_L_i); auto alias1 = 0.; auto alias2 = 0.; @@ -718,7 +680,7 @@ static auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh, Utils::Vector3i nm{}; Utils::Vector3d fnm{}; for_each_3d( - m_start, m_stop, indices, + mesh_start, mesh_stop, indices, [&]() { auto const norm_sq = nm.norm2(); auto const ex = std::exp(-factor1 * norm_sq); @@ -741,14 +703,14 @@ static double dp3m_k_space_error(double box_size, int mesh, int cao, auto const cotangent_sum = math::get_analytic_cotangent_sum_kernel(cao); auto const mesh_i = 1. / static_cast(mesh); auto const alpha_L_i = 1. / alpha_L; - auto const m_stop = Utils::Vector3i::broadcast(mesh / 2); - auto const m_start = -m_stop; + auto const mesh_stop = Utils::Vector3i::broadcast(mesh / 2); + auto const mesh_start = -mesh_stop; auto indices = Utils::Vector3i{}; auto values = Utils::Vector3d{}; auto he_q = 0.; for_each_3d( - m_start, m_stop, indices, + mesh_start, mesh_stop, indices, [&]() { if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) { auto const n2 = indices.norm2(); @@ -926,4 +888,5 @@ void npt_add_virial_magnetic_contribution(double energy) { npt_add_virial_contribution(energy); } #endif // NPT + #endif // DP3M diff --git a/src/core/magnetostatics/dp3m.hpp b/src/core/magnetostatics/dp3m.hpp index cdbea9e8ac1..0bdf4f05ec2 100644 --- a/src/core/magnetostatics/dp3m.hpp +++ b/src/core/magnetostatics/dp3m.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2022 The ESPResSo project + * Copyright (C) 2010-2024 The ESPResSo project * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 * Max-Planck-Institute for Polymer Research, Theory Group * @@ -39,7 +39,6 @@ #include "p3m/common.hpp" #include "p3m/data_struct.hpp" -#include "p3m/fft.hpp" #include "p3m/interpolation.hpp" #include "p3m/send_mesh.hpp" @@ -52,6 +51,7 @@ #include #include #include +#include #include #ifdef NPT @@ -59,42 +59,28 @@ void npt_add_virial_magnetic_contribution(double energy); #endif -struct dp3m_data_struct : public p3m_data_struct_base { - explicit dp3m_data_struct(P3MParameters &¶meters) - : p3m_data_struct_base{std::move(parameters)} {} - - /** local mesh. */ - P3MLocalMesh local_mesh; - /** real space mesh (local) for CA/FFT. */ - fft_vector rs_mesh; - /** real space mesh (local) for CA/FFT of the dipolar field. */ - std::array, 3> rs_mesh_dip; - /** k-space mesh (local) for k-space calculation and FFT. */ - std::vector ks_mesh; - - /** number of dipolar particles (only on head node). */ - int sum_dip_part = 0; - /** Sum of square of magnetic dipoles (only on head node). */ - double sum_mu2 = 0.; - - /** position shift for calculation of first assignment mesh point. */ - double pos_shift = 0.; +/** @brief Dipolar P3M solver. */ +struct DipolarP3M : public Dipoles::Actor { + struct p3m_data_struct_impl : public p3m_data_struct { + explicit p3m_data_struct_impl(P3MParameters &¶meters) + : p3m_data_struct{std::move(parameters)} {} - p3m_interpolation_cache inter_weights; + /** number of dipolar particles (only on head node). */ + int sum_dip_part = 0; + /** Sum of square of magnetic dipoles (only on head node). */ + double sum_mu2 = 0.; - /** send/recv mesh sizes */ - p3m_send_mesh sm; + /** position shift for calculation of first assignment mesh point. */ + double pos_shift = 0.; - /** cached k-space self-energy correction */ - double energy_correction = 0.; + p3m_interpolation_cache inter_weights; - fft_data_struct fft; -}; + /** cached k-space self-energy correction */ + double energy_correction = 0.; + }; -/** @brief Dipolar P3M solver. */ -struct DipolarP3M : public Dipoles::Actor { /** Dipolar P3M parameters. */ - dp3m_data_struct dp3m; + p3m_data_struct_impl dp3m; /** Magnetostatics prefactor. */ int tune_timings; diff --git a/src/core/p3m/CMakeLists.txt b/src/core/p3m/CMakeLists.txt index f05697c8378..2a6dd407baf 100644 --- a/src/core/p3m/CMakeLists.txt +++ b/src/core/p3m/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2018-2022 The ESPResSo project +# Copyright (C) 2018-2024 The ESPResSo project # # This file is part of ESPResSo. # @@ -17,13 +17,5 @@ # along with this program. If not, see . # -if(FFTW3_FOUND) - target_link_libraries(espresso_core PUBLIC FFTW3::FFTW3) -endif() - -target_sources( - espresso_core - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/TuningAlgorithm.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/send_mesh.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/fft.cpp) +target_sources(espresso_core PRIVATE common.cpp send_mesh.cpp + TuningAlgorithm.cpp FFTBackendLegacy.cpp) diff --git a/src/core/p3m/FFTBackendLegacy.cpp b/src/core/p3m/FFTBackendLegacy.cpp new file mode 100644 index 00000000000..76fcc7f021d --- /dev/null +++ b/src/core/p3m/FFTBackendLegacy.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2010-2024 The ESPResSo project + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 + * Max-Planck-Institute for Polymer Research, Theory Group + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "config/config.hpp" + +#if defined(P3M) or defined(DP3M) + +#include "FFTBackendLegacy.hpp" + +#include "communication.hpp" + +#include "fft/fft.hpp" + +#include + +#include +#include +#include +#include + +FFTBackendLegacy::FFTBackendLegacy(p3m_data_struct &obj) + : FFTBackend(obj), + fft{std::make_unique( + ::Communication::mpiCallbacksHandle()->share_mpi_env())} {} + +FFTBackendLegacy::~FFTBackendLegacy() = default; + +void FFTBackendLegacy::update_mesh_data() { + auto const mesh_size_ptr = fft->get_mesh_size(); + auto const mesh_start_ptr = fft->get_mesh_start(); + for (auto i = 0u; i < 3u; ++i) { + mesh.size[i] = mesh_size_ptr[i]; + mesh.start[i] = mesh_start_ptr[i]; + } + mesh.stop = mesh.start + mesh.size; + mesh.ks_scalar = std::span(ks_mesh); + mesh.rs_scalar = std::span(rs_mesh); + for (auto i = 0u; i < 3u; ++i) { + mesh.rs_fields[i] = std::span(rs_mesh_fields[i]); + } +} + +void FFTBackendLegacy::init_fft() { + mesh_comm.resize(::comm_cart, local_mesh); + auto ca_mesh_size = fft->initialize_fft( + ::comm_cart, local_mesh.dim, local_mesh.margin, params.mesh, + params.mesh_off, mesh.ks_pnum, ::communicator.node_grid); + rs_mesh.resize(ca_mesh_size); + if (dipolar) { + ks_mesh.resize(ca_mesh_size); + } + for (auto &rs_mesh_field : rs_mesh_fields) { + rs_mesh_field.resize(ca_mesh_size); + } + update_mesh_data(); +} + +void FFTBackendLegacy::perform_field_back_fft() { + /* Back FFT force component mesh */ + for (auto &rs_mesh_field : rs_mesh_fields) { + fft->backward_fft(::comm_cart, rs_mesh_field.data(), + check_complex_residuals); + } + /* redistribute force component mesh */ + std::array meshes = {{rs_mesh_fields[0u].data(), + rs_mesh_fields[1u].data(), + rs_mesh_fields[2u].data()}}; + mesh_comm.spread_grid(::comm_cart, meshes, local_mesh.dim); +} + +void FFTBackendLegacy::perform_fwd_fft() { + if (dipolar) { + std::array meshes = {{rs_mesh_fields[0u].data(), + rs_mesh_fields[1u].data(), + rs_mesh_fields[2u].data()}}; + mesh_comm.gather_grid(::comm_cart, meshes, local_mesh.dim); + for (auto &rs_mesh_field : rs_mesh_fields) { + fft->forward_fft(::comm_cart, rs_mesh_field.data()); + } + } else { + mesh_comm.gather_grid(::comm_cart, rs_mesh.data(), local_mesh.dim); + fft->forward_fft(::comm_cart, rs_mesh.data()); + } + update_mesh_data(); +} + +void FFTBackendLegacy::perform_space_back_fft() { + /* Back FFT force component mesh */ + fft->backward_fft(::comm_cart, rs_mesh.data(), check_complex_residuals); + /* redistribute force component mesh */ + mesh_comm.spread_grid(::comm_cart, rs_mesh.data(), local_mesh.dim); +} + +#endif // defined(P3M) or defined(DP3M) diff --git a/src/core/p3m/FFTBackendLegacy.hpp b/src/core/p3m/FFTBackendLegacy.hpp new file mode 100644 index 00000000000..3b4ecb41c7d --- /dev/null +++ b/src/core/p3m/FFTBackendLegacy.hpp @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2010-2024 The ESPResSo project + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 + * Max-Planck-Institute for Polymer Research, Theory Group + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include "config/config.hpp" + +#if defined(P3M) or defined(DP3M) + +#include "common.hpp" +#include "data_struct.hpp" +#include "send_mesh.hpp" + +#include "fft/vector.hpp" + +#include +#include +#include + +namespace fft { +struct fft_data_struct; +} // namespace fft + +/** + * @brief Historic FFT backend based on FFTW3. + * The 3D FFT is split into three 1D FFTs. + */ +class FFTBackendLegacy : public FFTBackend { + std::unique_ptr fft; + /** @brief k-space mesh (local) for k-space calculations. */ + std::vector ks_mesh; + /** @brief real-space mesh (local) for CA/FFT. */ + fft::vector rs_mesh; + /** @brief real-space mesh (local) for the electric or dipolar field. */ + std::array, 3> rs_mesh_fields; + p3m_send_mesh mesh_comm; + +public: + explicit FFTBackendLegacy(p3m_data_struct &obj); + ~FFTBackendLegacy() override; + void init_fft() override; + void perform_fwd_fft() override; + void perform_field_back_fft() override; + void perform_space_back_fft() override; + void update_mesh_data(); + + /** + * @brief Index helpers for reciprocal space. + * After the FFT the data is in order YZX, which + * means that Y is the slowest changing index. + */ + std::tuple get_permutations() const override { + constexpr static int KX = 2; + constexpr static int KY = 0; + constexpr static int KZ = 1; + return {KX, KY, KZ}; + } +}; + +#endif // defined(P3M) or defined(DP3M) diff --git a/src/core/p3m/common.cpp b/src/core/p3m/common.cpp index fb8023eba74..6290c46d6ab 100644 --- a/src/core/p3m/common.cpp +++ b/src/core/p3m/common.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2022 The ESPResSo project + * Copyright (C) 2010-2024 The ESPResSo project * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 * Max-Planck-Institute for Polymer Research, Theory Group * @@ -21,7 +21,7 @@ #include "config/config.hpp" -#if defined(P3M) || defined(DP3M) +#if defined(P3M) or defined(DP3M) #include "common.hpp" @@ -105,4 +105,4 @@ void P3MLocalMesh::calc_local_ca_mesh(P3MParameters const ¶ms, q_21_off = dim[2] * (dim[1] - params.cao); } -#endif /* defined(P3M) || defined(DP3M) */ +#endif // defined(P3M) or defined(DP3M) diff --git a/src/core/p3m/common.hpp b/src/core/p3m/common.hpp index 6bc30828ee5..af9a9af5959 100644 --- a/src/core/p3m/common.hpp +++ b/src/core/p3m/common.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2022 The ESPResSo project + * Copyright (C) 2010-2024 The ESPResSo project * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 * Max-Planck-Institute for Polymer Research, Theory Group * @@ -45,27 +45,15 @@ /** This value indicates metallic boundary conditions. */ auto constexpr P3M_EPSILON_METALLIC = 0.0; -#if defined(P3M) || defined(DP3M) +#if defined(P3M) or defined(DP3M) #include "LocalBox.hpp" -#include #include +#include #include -#include - -namespace detail { -/** @brief Index helpers for direct and reciprocal space. - * After the FFT the data is in order YZX, which - * means that Y is the slowest changing index. - */ -namespace FFT_indexing { -enum FFT_REAL_VECTOR : int { RX = 0, RY = 1, RZ = 2 }; -enum FFT_WAVE_VECTOR : int { KY = 0, KZ = 1, KX = 2 }; -} // namespace FFT_indexing -} // namespace detail -/** Structure to hold P3M parameters and some dependent variables. */ +/** @brief Structure to hold P3M parameters and some dependent variables. */ struct P3MParameters { /** tuning or production? */ bool tuning; @@ -181,9 +169,8 @@ struct P3MParameters { } }; -/** Structure for local mesh parameters. */ +/** @brief Properties of the local mesh. */ struct P3MLocalMesh { - /* local mesh characterization. */ /** dimension (size) of local mesh. */ Utils::Vector3i dim; /** number of local mesh points. */ @@ -228,7 +215,27 @@ struct P3MLocalMesh { double space_layer); }; -#endif /* P3M || DP3M */ +/** @brief Local mesh FFT buffers. */ +struct P3MFFTMesh { + /** @brief k-space scalar mesh for k-space calculations. */ + std::span ks_scalar; + /** @brief real-space scalar mesh for charge assignment and FFT. */ + std::span rs_scalar; + /** @brief real-space 3D meshes for the electric or dipolar field. */ + std::array, 3> rs_fields; + + /** @brief Indices of the lower left corner of the local mesh grid. */ + Utils::Vector3i start; + /** @brief Indices of the upper right corner of the local mesh grid. */ + Utils::Vector3i stop; + /** @brief Extents of the local mesh grid. */ + Utils::Vector3i size; + + /** @brief number of permutations in k_space */ + int ks_pnum = 0; +}; + +#endif // defined(P3M) or defined(DP3M) namespace detail { /** Calculate indices that shift @ref P3MParameters::mesh "mesh" by `mesh/2`. diff --git a/src/core/p3m/data_struct.hpp b/src/core/p3m/data_struct.hpp index 410cffbbf94..3e0a9a1614a 100644 --- a/src/core/p3m/data_struct.hpp +++ b/src/core/p3m/data_struct.hpp @@ -1,6 +1,6 @@ /* - * Copyright (C) 2010-2022 The ESPResSo project - * Copyright (C) 2002-2010 + * Copyright (C) 2010-2024 The ESPResSo project + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 * Max-Planck-Institute for Polymer Research, Theory Group * * This file is part of ESPResSo. @@ -23,20 +23,38 @@ #include "config/config.hpp" -#if defined(P3M) || defined(DP3M) +#if defined(P3M) or defined(DP3M) #include "common.hpp" #include +#include +#include +#include +#include #include -struct p3m_data_struct_base { - explicit p3m_data_struct_base(P3MParameters &¶meters) - : params{std::move(parameters)}, ks_pnum{0} {} +class FFTBackend; +/** + * @brief Base class for the electrostatics and magnetostatics P3M algorithms. + * Contains a handle to the FFT backend, information about the local mesh, + * the differential operator, and various buffers. + */ +struct p3m_data_struct { + explicit p3m_data_struct(P3MParameters &¶meters) + : params{std::move(parameters)} {} + + /** @brief P3M base parameters. */ P3MParameters params; + /** @brief Local mesh properties. */ + P3MLocalMesh local_mesh; + /** @brief Local mesh FFT buffers. */ + P3MFFTMesh mesh; - /** Spatial differential operator in k-space. We use an i*k differentiation. + /** + * @brief Spatial differential operator in k-space. + * We use an i*k differentiation. */ std::array, 3> d_op; /** Force optimised influence function (k-space) */ @@ -44,8 +62,8 @@ struct p3m_data_struct_base { /** Energy optimised influence function (k-space) */ std::vector g_energy; - /** number of permutations in k_space */ - int ks_pnum; + /** FFT backend. */ + std::unique_ptr fft; /** Calculate the Fourier transformed differential operator. * Remark: This is done on the level of n-vectors and not k-vectors, @@ -54,6 +72,50 @@ struct p3m_data_struct_base { void calc_differential_operator() { d_op = detail::calc_meshift(params.mesh, true); } + + template void make_fft_instance(Args... args) { + assert(fft == nullptr); + fft = std::make_unique(*this, args...); + } + + inline void set_dipolar_mode(); }; -#endif +/** + * @brief API for the FFT backend of the P3M algorithm. + * Any FFT backend must implement this interface. + * The backend can read some members of @ref p3m_data_struct + * but can only modify the FFT buffers in @ref P3MFFTMesh. + */ +class FFTBackend { +protected: + P3MParameters const ¶ms; + P3MLocalMesh const &local_mesh; + P3MFFTMesh &mesh; + +public: + bool dipolar = false; + bool check_complex_residuals = false; + explicit FFTBackend(p3m_data_struct &obj) + : params{obj.params}, local_mesh{obj.local_mesh}, mesh{obj.mesh} {} + virtual ~FFTBackend() = default; + /** @brief Initialize the FFT plans and buffers. */ + virtual void init_fft() = 0; + /** @brief Carry out the forward FFT of the scalar or field meshes. */ + virtual void perform_fwd_fft() = 0; + /** @brief Carry out the backward FFT of the scalar mesh. */ + virtual void perform_field_back_fft() = 0; + /** @brief Carry out the backward FFT of the field meshes. */ + virtual void perform_space_back_fft() = 0; + /** @brief Get indices of the k-space data layout. */ + virtual std::tuple get_permutations() const = 0; +}; + +void p3m_data_struct::set_dipolar_mode() { + assert(g_energy.empty() and g_force.empty()); + assert(d_op[0u].empty() and d_op[1u].empty() and d_op[2u].empty()); + assert(not fft->dipolar); + fft->dipolar = true; +} + +#endif // defined(P3M) or defined(DP3M) diff --git a/src/core/p3m/for_each_3d.hpp b/src/core/p3m/for_each_3d.hpp index 9f44871bd54..cc4bb2553d0 100644 --- a/src/core/p3m/for_each_3d.hpp +++ b/src/core/p3m/for_each_3d.hpp @@ -20,6 +20,7 @@ #pragma once #include +#include namespace detail { diff --git a/src/core/p3m/influence_function.hpp b/src/core/p3m/influence_function.hpp index 24853d43e25..eb259d89d92 100644 --- a/src/core/p3m/influence_function.hpp +++ b/src/core/p3m/influence_function.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2022 The ESPResSo project + * Copyright (C) 2019-2024 The ESPResSo project * * This file is part of ESPResSo. * @@ -47,7 +47,7 @@ * * @param cao Charge assignment order. * @param alpha Ewald splitting parameter. - * @param k k Vector to evaluate the function for. + * @param k k-vector to evaluate the function for. * @param h Grid spacing. */ template @@ -74,7 +74,6 @@ double G_opt(int cao, double alpha, Utils::Vector3d const &k, for_each_3d( m_start, m_stop, indices, [&]() { - using namespace detail::FFT_indexing; auto const U2 = std::pow(Utils::product(fnm), 2 * cao); auto const km2 = km.norm2(); auto const exponent = exponent_prefactor * km2; @@ -102,17 +101,22 @@ double G_opt(int cao, double alpha, Utils::Vector3d const &k, * 1 for electric field... * @tparam m Number of aliasing terms to take into account. * - * @param params P3M parameters - * @param n_start Lower left corner of the grid + * @param params P3M parameters. + * @param n_start Lower left corner of the grid. * @param n_stop Upper right corner of the grid. - * @param inv_box_l Inverse box length + * @param KX k-space x-axis index. + * @param KY k-space y-axis index. + * @param KZ k-space z-axis index. + * @param inv_box_l Inverse box length. * @return Values of G_opt at regular grid points. */ template -std::vector grid_influence_function(const P3MParameters ¶ms, - const Utils::Vector3i &n_start, - const Utils::Vector3i &n_stop, - const Utils::Vector3d &inv_box_l) { +std::vector grid_influence_function(P3MParameters const ¶ms, + Utils::Vector3i const &n_start, + Utils::Vector3i const &n_stop, + int const KX, int const KY, + int const KZ, + Utils::Vector3d const &inv_box_l) { auto const shifts = detail::calc_meshift(params.mesh); auto const size = n_stop - n_start; @@ -132,14 +136,13 @@ std::vector grid_influence_function(const P3MParameters ¶ms, auto index = std::size_t(0u); for_each_3d(n_start, n_stop, indices, [&]() { - using namespace detail::FFT_indexing; - if ((indices[KX] % half_mesh[RX] != 0) or - (indices[KY] % half_mesh[RY] != 0) or - (indices[KZ] % half_mesh[RZ] != 0)) { + if ((indices[KX] % half_mesh[0u] != 0) or + (indices[KY] % half_mesh[1u] != 0) or + (indices[KZ] % half_mesh[2u] != 0)) { auto const k = - Utils::Vector3d{{shifts[RX][indices[KX]] * wavevector[RX], - shifts[RY][indices[KY]] * wavevector[RY], - shifts[RZ][indices[KZ]] * wavevector[RZ]}}; + Utils::Vector3d{{shifts[0u][indices[KX]] * wavevector[0u], + shifts[1u][indices[KY]] * wavevector[1u], + shifts[2u][indices[KZ]] * wavevector[2u]}}; g[index] = G_opt(params.cao, params.alpha, k, params.a); } ++index; diff --git a/src/core/p3m/interpolation.hpp b/src/core/p3m/interpolation.hpp index 355307b61f4..8aa6b76525e 100644 --- a/src/core/p3m/interpolation.hpp +++ b/src/core/p3m/interpolation.hpp @@ -18,8 +18,8 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#ifndef ESPRESSO_CORE_P3M_INTERPOLATION_HPP -#define ESPRESSO_CORE_P3M_INTERPOLATION_HPP + +#pragma once #include #include @@ -206,5 +206,3 @@ void p3m_interpolate(P3MLocalMesh const &local_mesh, q_ind += local_mesh.q_21_off; } } - -#endif diff --git a/src/core/p3m/send_mesh.cpp b/src/core/p3m/send_mesh.cpp index 5eea122b69a..9bb377a7688 100644 --- a/src/core/p3m/send_mesh.cpp +++ b/src/core/p3m/send_mesh.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2022 The ESPResSo project + * Copyright (C) 2010-2024 The ESPResSo project * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 * Max-Planck-Institute for Polymer Research, Theory Group * @@ -18,12 +18,13 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ + #include "config/config.hpp" -#if defined(P3M) || defined(DP3M) +#if defined(P3M) or defined(DP3M) +#include "fft/fft.hpp" #include "p3m/common.hpp" -#include "p3m/fft.hpp" #include "p3m/send_mesh.hpp" #include @@ -70,8 +71,8 @@ static void p3m_add_block(double const *in, double *out, int const start[3], } } -void p3m_send_mesh::resize(const boost::mpi::communicator &comm, - const P3MLocalMesh &local_mesh) { +void p3m_send_mesh::resize(boost::mpi::communicator const &comm, + P3MLocalMesh const &local_mesh) { int done[3] = {0, 0, 0}; /* send grids */ for (int i = 0; i < 3; i++) { @@ -145,9 +146,9 @@ void p3m_send_mesh::resize(const boost::mpi::communicator &comm, } } -void p3m_send_mesh::gather_grid(std::span meshes, - const boost::mpi::communicator &comm, - const Utils::Vector3i &dim) { +void p3m_send_mesh::gather_grid(boost::mpi::communicator const &comm, + std::span meshes, + Utils::Vector3i const &dim) { auto const node_neighbors = Utils::Mpi::cart_neighbors<3>(comm); send_grid.resize(max * meshes.size()); recv_grid.resize(max * meshes.size()); @@ -159,8 +160,8 @@ void p3m_send_mesh::gather_grid(std::span meshes, /* pack send block */ if (s_size[s_dir] > 0) for (std::size_t i = 0; i < meshes.size(); i++) { - fft_pack_block(meshes[i], send_grid.data() + i * s_size[s_dir], - s_ld[s_dir], s_dim[s_dir], dim.data(), 1); + fft::fft_pack_block(meshes[i], send_grid.data() + i * s_size[s_dir], + s_ld[s_dir], s_dim[s_dir], dim.data(), 1); } /* communication */ @@ -183,9 +184,9 @@ void p3m_send_mesh::gather_grid(std::span meshes, } } -void p3m_send_mesh::spread_grid(std::span meshes, - const boost::mpi::communicator &comm, - const Utils::Vector3i &dim) { +void p3m_send_mesh::spread_grid(boost::mpi::communicator const &comm, + std::span meshes, + Utils::Vector3i const &dim) { auto const node_neighbors = Utils::Mpi::cart_neighbors<3>(comm); send_grid.resize(max * meshes.size()); recv_grid.resize(max * meshes.size()); @@ -197,8 +198,8 @@ void p3m_send_mesh::spread_grid(std::span meshes, /* pack send block */ if (r_size[r_dir] > 0) for (std::size_t i = 0; i < meshes.size(); i++) { - fft_pack_block(meshes[i], send_grid.data() + i * r_size[r_dir], - r_ld[r_dir], r_dim[r_dir], dim.data(), 1); + fft::fft_pack_block(meshes[i], send_grid.data() + i * r_size[r_dir], + r_ld[r_dir], r_dim[r_dir], dim.data(), 1); } /* communication */ if (node_neighbors[r_dir] != comm.rank()) { @@ -213,11 +214,11 @@ void p3m_send_mesh::spread_grid(std::span meshes, /* un pack recv block */ if (s_size[s_dir] > 0) { for (std::size_t i = 0; i < meshes.size(); i++) { - fft_unpack_block(recv_grid.data() + i * s_size[s_dir], meshes[i], - s_ld[s_dir], s_dim[s_dir], dim.data(), 1); + fft::fft_unpack_block(recv_grid.data() + i * s_size[s_dir], meshes[i], + s_ld[s_dir], s_dim[s_dir], dim.data(), 1); } } } } -#endif +#endif // defined(P3M) or defined(DP3M) diff --git a/src/core/p3m/send_mesh.hpp b/src/core/p3m/send_mesh.hpp index 1ccffcd91dc..8fa73435880 100644 --- a/src/core/p3m/send_mesh.hpp +++ b/src/core/p3m/send_mesh.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2022 The ESPResSo project + * Copyright (C) 2010-2024 The ESPResSo project * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 * Max-Planck-Institute for Polymer Research, Theory Group * @@ -18,12 +18,12 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#ifndef ESPRESSO_CORE_P3M_SEND_MESH_HPP -#define ESPRESSO_CORE_P3M_SEND_MESH_HPP + +#pragma once #include "config/config.hpp" -#if defined(P3M) || defined(DP3M) +#if defined(P3M) or defined(DP3M) #include "p3m/common.hpp" @@ -66,22 +66,20 @@ class p3m_send_mesh { std::vector recv_grid; public: - void resize(const boost::mpi::communicator &comm, - const P3MLocalMesh &local_mesh); - void gather_grid(std::span meshes, - const boost::mpi::communicator &comm, - const Utils::Vector3i &dim); - void gather_grid(double *mesh, const boost::mpi::communicator &comm, - const Utils::Vector3i &dim) { - gather_grid(std::span(&mesh, 1u), comm, dim); + void resize(boost::mpi::communicator const &comm, + P3MLocalMesh const &local_mesh); + void gather_grid(boost::mpi::communicator const &comm, + std::span meshes, Utils::Vector3i const &dim); + void gather_grid(boost::mpi::communicator const &comm, double *mesh, + Utils::Vector3i const &dim) { + gather_grid(comm, std::span(&mesh, 1u), dim); } - void spread_grid(std::span meshes, - const boost::mpi::communicator &comm, - const Utils::Vector3i &dim); - void spread_grid(double *mesh, const boost::mpi::communicator &comm, - const Utils::Vector3i &dim) { - spread_grid(std::span(&mesh, 1u), comm, dim); + void spread_grid(boost::mpi::communicator const &comm, + std::span meshes, Utils::Vector3i const &dim); + void spread_grid(boost::mpi::communicator const &comm, double *mesh, + Utils::Vector3i const &dim) { + spread_grid(comm, std::span(&mesh, 1u), dim); } }; -#endif -#endif + +#endif // defined(P3M) or defined(DP3M) diff --git a/src/core/unit_tests/EspressoSystemStandAlone_test.cpp b/src/core/unit_tests/EspressoSystemStandAlone_test.cpp index f917c8cc0fc..e0124cdc18e 100644 --- a/src/core/unit_tests/EspressoSystemStandAlone_test.cpp +++ b/src/core/unit_tests/EspressoSystemStandAlone_test.cpp @@ -49,6 +49,7 @@ namespace utf = boost::unit_test; #include "nonbonded_interactions/nonbonded_interaction_data.hpp" #include "observables/ParticleVelocities.hpp" #include "observables/PidObservable.hpp" +#include "p3m/FFTBackendLegacy.hpp" #include "particle_node.hpp" #include "system/System.hpp" @@ -313,6 +314,7 @@ BOOST_FIXTURE_TEST_CASE(espresso_system_stand_alone, ParticleFactory) { 1e-3}; auto solver = std::make_shared(std::move(p3m), prefactor, 1, false, true); + solver->p3m.make_fft_instance(); add_actor(comm, espresso::system, system.coulomb.impl->solver, solver, [&system]() { system.on_coulomb_change(); }); diff --git a/src/core/unit_tests/fft_test.cpp b/src/core/unit_tests/fft_test.cpp index 6e301a48da2..94cfc33f7fc 100644 --- a/src/core/unit_tests/fft_test.cpp +++ b/src/core/unit_tests/fft_test.cpp @@ -22,11 +22,12 @@ #include "config/config.hpp" -#if defined(P3M) || defined(DP3M) +#if defined(P3M) or defined(DP3M) #include -#include "p3m/fft.hpp" +#include "fft/fft.hpp" +#include "fft/vector.hpp" #include "p3m/for_each_3d.hpp" #include @@ -39,13 +40,8 @@ #include #include -std::optional> find_comm_groups(Utils::Vector3i const &, - Utils::Vector3i const &, - std::span, - std::span, std::span, - std::span, int); - BOOST_AUTO_TEST_CASE(fft_find_comm_groups_mismatch) { + using fft::find_comm_groups; int my_pos[3] = {0}; int nodelist[4] = {0}; int nodepos[12] = {0}; @@ -68,6 +64,7 @@ BOOST_AUTO_TEST_CASE(fft_find_comm_groups_mismatch) { } BOOST_AUTO_TEST_CASE(fft_map_grid) { + using fft::map_3don2d_grid; { auto g3d = Utils::Vector3i{{3, 2, 1}}; auto g2d = Utils::Vector3i{{3, 2, 1}}; @@ -161,7 +158,7 @@ BOOST_AUTO_TEST_CASE(fft_map_grid) { BOOST_AUTO_TEST_CASE(fft_exceptions) { auto constexpr size_max = std::numeric_limits::max(); auto constexpr bad_size = size_max / sizeof(int) + 1ul; - fft_allocator allocator{}; + fft::allocator allocator{}; BOOST_CHECK_EQUAL(allocator.allocate(0ul), nullptr); BOOST_CHECK_THROW(allocator.allocate(bad_size), std::bad_array_new_length); } @@ -203,6 +200,6 @@ BOOST_AUTO_TEST_CASE(for_each_3d_test) { } } -#else // defined(P3M) || defined(DP3M) +#else // defined(P3M) or defined(DP3M) int main(int argc, char **argv) {} -#endif // defined(P3M) || defined(DP3M) +#endif // defined(P3M) or defined(DP3M) diff --git a/src/script_interface/electrostatics/CoulombP3M.hpp b/src/script_interface/electrostatics/CoulombP3M.hpp index 38fc376835c..1d221502870 100644 --- a/src/script_interface/electrostatics/CoulombP3M.hpp +++ b/src/script_interface/electrostatics/CoulombP3M.hpp @@ -26,6 +26,7 @@ #include "Actor.hpp" #include "core/electrostatics/p3m.hpp" +#include "core/p3m/FFTBackendLegacy.hpp" #include "script_interface/get_value.hpp" @@ -88,6 +89,7 @@ class CoulombP3M : public Actor { std::move(p3m), get_value(params, "prefactor"), get_value(params, "timings"), get_value(params, "verbose"), get_value(params, "check_complex_residuals")); + m_actor->p3m.make_fft_instance(); }); set_charge_neutrality_tolerance(params); } diff --git a/src/script_interface/electrostatics/CoulombP3MGPU.hpp b/src/script_interface/electrostatics/CoulombP3MGPU.hpp index 95ce143014c..fbb00e39c44 100644 --- a/src/script_interface/electrostatics/CoulombP3MGPU.hpp +++ b/src/script_interface/electrostatics/CoulombP3MGPU.hpp @@ -27,6 +27,7 @@ #include "Actor.hpp" #include "core/electrostatics/p3m_gpu.hpp" +#include "core/p3m/FFTBackendLegacy.hpp" #include "script_interface/get_value.hpp" @@ -89,6 +90,7 @@ class CoulombP3MGPU : public Actor { std::move(p3m), get_value(params, "prefactor"), get_value(params, "timings"), get_value(params, "verbose"), get_value(params, "check_complex_residuals")); + m_actor->p3m.make_fft_instance(); // for CPU part }); set_charge_neutrality_tolerance(params); } diff --git a/src/script_interface/magnetostatics/DipolarP3M.hpp b/src/script_interface/magnetostatics/DipolarP3M.hpp index d37733d75d6..77913690623 100644 --- a/src/script_interface/magnetostatics/DipolarP3M.hpp +++ b/src/script_interface/magnetostatics/DipolarP3M.hpp @@ -26,6 +26,7 @@ #include "Actor.hpp" #include "core/magnetostatics/dp3m.hpp" +#include "core/p3m/FFTBackendLegacy.hpp" #include "script_interface/get_value.hpp" @@ -85,6 +86,8 @@ class DipolarP3M : public Actor { std::move(p3m), get_value(params, "prefactor"), get_value(params, "timings"), get_value(params, "verbose")); + m_actor->dp3m.make_fft_instance(); + m_actor->dp3m.set_dipolar_mode(); }); } }; From 2adf306ac651703cb6bf1b6c3943297cbf86fbc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Thu, 4 Jul 2024 12:11:55 +0200 Subject: [PATCH 2/2] Hide FFT dipolar flag --- src/core/p3m/FFTBackendLegacy.cpp | 4 ++-- src/core/p3m/FFTBackendLegacy.hpp | 3 ++- src/core/p3m/data_struct.hpp | 10 ---------- src/core/unit_tests/EspressoSystemStandAlone_test.cpp | 2 +- src/script_interface/electrostatics/CoulombP3M.hpp | 2 +- src/script_interface/electrostatics/CoulombP3MGPU.hpp | 2 +- src/script_interface/magnetostatics/DipolarP3M.hpp | 3 +-- 7 files changed, 8 insertions(+), 18 deletions(-) diff --git a/src/core/p3m/FFTBackendLegacy.cpp b/src/core/p3m/FFTBackendLegacy.cpp index 76fcc7f021d..146c241de19 100644 --- a/src/core/p3m/FFTBackendLegacy.cpp +++ b/src/core/p3m/FFTBackendLegacy.cpp @@ -36,8 +36,8 @@ #include #include -FFTBackendLegacy::FFTBackendLegacy(p3m_data_struct &obj) - : FFTBackend(obj), +FFTBackendLegacy::FFTBackendLegacy(p3m_data_struct &obj, bool dipolar) + : FFTBackend(obj), dipolar{dipolar}, fft{std::make_unique( ::Communication::mpiCallbacksHandle()->share_mpi_env())} {} diff --git a/src/core/p3m/FFTBackendLegacy.hpp b/src/core/p3m/FFTBackendLegacy.hpp index 3b4ecb41c7d..2ae1a379aef 100644 --- a/src/core/p3m/FFTBackendLegacy.hpp +++ b/src/core/p3m/FFTBackendLegacy.hpp @@ -44,6 +44,7 @@ struct fft_data_struct; * The 3D FFT is split into three 1D FFTs. */ class FFTBackendLegacy : public FFTBackend { + bool dipolar; std::unique_ptr fft; /** @brief k-space mesh (local) for k-space calculations. */ std::vector ks_mesh; @@ -54,7 +55,7 @@ class FFTBackendLegacy : public FFTBackend { p3m_send_mesh mesh_comm; public: - explicit FFTBackendLegacy(p3m_data_struct &obj); + explicit FFTBackendLegacy(p3m_data_struct &obj, bool dipolar); ~FFTBackendLegacy() override; void init_fft() override; void perform_fwd_fft() override; diff --git a/src/core/p3m/data_struct.hpp b/src/core/p3m/data_struct.hpp index 3e0a9a1614a..9b607031991 100644 --- a/src/core/p3m/data_struct.hpp +++ b/src/core/p3m/data_struct.hpp @@ -77,8 +77,6 @@ struct p3m_data_struct { assert(fft == nullptr); fft = std::make_unique(*this, args...); } - - inline void set_dipolar_mode(); }; /** @@ -94,7 +92,6 @@ class FFTBackend { P3MFFTMesh &mesh; public: - bool dipolar = false; bool check_complex_residuals = false; explicit FFTBackend(p3m_data_struct &obj) : params{obj.params}, local_mesh{obj.local_mesh}, mesh{obj.mesh} {} @@ -111,11 +108,4 @@ class FFTBackend { virtual std::tuple get_permutations() const = 0; }; -void p3m_data_struct::set_dipolar_mode() { - assert(g_energy.empty() and g_force.empty()); - assert(d_op[0u].empty() and d_op[1u].empty() and d_op[2u].empty()); - assert(not fft->dipolar); - fft->dipolar = true; -} - #endif // defined(P3M) or defined(DP3M) diff --git a/src/core/unit_tests/EspressoSystemStandAlone_test.cpp b/src/core/unit_tests/EspressoSystemStandAlone_test.cpp index e0124cdc18e..ce9258b733c 100644 --- a/src/core/unit_tests/EspressoSystemStandAlone_test.cpp +++ b/src/core/unit_tests/EspressoSystemStandAlone_test.cpp @@ -314,7 +314,7 @@ BOOST_FIXTURE_TEST_CASE(espresso_system_stand_alone, ParticleFactory) { 1e-3}; auto solver = std::make_shared(std::move(p3m), prefactor, 1, false, true); - solver->p3m.make_fft_instance(); + solver->p3m.make_fft_instance(false); add_actor(comm, espresso::system, system.coulomb.impl->solver, solver, [&system]() { system.on_coulomb_change(); }); diff --git a/src/script_interface/electrostatics/CoulombP3M.hpp b/src/script_interface/electrostatics/CoulombP3M.hpp index 1d221502870..34ea03c20e1 100644 --- a/src/script_interface/electrostatics/CoulombP3M.hpp +++ b/src/script_interface/electrostatics/CoulombP3M.hpp @@ -89,7 +89,7 @@ class CoulombP3M : public Actor { std::move(p3m), get_value(params, "prefactor"), get_value(params, "timings"), get_value(params, "verbose"), get_value(params, "check_complex_residuals")); - m_actor->p3m.make_fft_instance(); + m_actor->p3m.make_fft_instance(false); }); set_charge_neutrality_tolerance(params); } diff --git a/src/script_interface/electrostatics/CoulombP3MGPU.hpp b/src/script_interface/electrostatics/CoulombP3MGPU.hpp index fbb00e39c44..187fde4308c 100644 --- a/src/script_interface/electrostatics/CoulombP3MGPU.hpp +++ b/src/script_interface/electrostatics/CoulombP3MGPU.hpp @@ -90,7 +90,7 @@ class CoulombP3MGPU : public Actor { std::move(p3m), get_value(params, "prefactor"), get_value(params, "timings"), get_value(params, "verbose"), get_value(params, "check_complex_residuals")); - m_actor->p3m.make_fft_instance(); // for CPU part + m_actor->p3m.make_fft_instance(false); // for CPU part }); set_charge_neutrality_tolerance(params); } diff --git a/src/script_interface/magnetostatics/DipolarP3M.hpp b/src/script_interface/magnetostatics/DipolarP3M.hpp index 77913690623..7474345e9d4 100644 --- a/src/script_interface/magnetostatics/DipolarP3M.hpp +++ b/src/script_interface/magnetostatics/DipolarP3M.hpp @@ -86,8 +86,7 @@ class DipolarP3M : public Actor { std::move(p3m), get_value(params, "prefactor"), get_value(params, "timings"), get_value(params, "verbose")); - m_actor->dp3m.make_fft_instance(); - m_actor->dp3m.set_dipolar_mode(); + m_actor->dp3m.make_fft_instance(true); }); } };