Skip to content

Commit

Permalink
Hide FFT implementation details (#4947)
Browse files Browse the repository at this point in the history
Fixes #4945

Description of changes:
- fully encapsulate the historic FFT implementation
   - electrostatics and magnetostatics now share the same FFT operations
   - FFT operations and mesh buffers are now better documented
   - all temporary mesh buffers are now private data members
- provide a type-erased API for the FFT backend
   - the P3M algorithm no longer has visibility of the FFT library
   - new FFT libraries can be used in place of the historic FFT implementation
- bugfixes:
   - the P3M algorithm no longer leaks memory when FFT plans are discarded
   - the FFT implementation now keeps the MPI environment alive until all FFT plans have been destroyed
      - avoids a race condition, since the FFT plan destructor need a MPI communicator
  • Loading branch information
kodiakhq[bot] authored Jul 4, 2024
2 parents c89fce3 + 2adf306 commit e4d63b2
Show file tree
Hide file tree
Showing 26 changed files with 927 additions and 641 deletions.
4 changes: 4 additions & 0 deletions src/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ if(ESPRESSO_BUILD_WITH_WALBERLA)
$<$<BOOL:${ESPRESSO_BUILD_WITH_CUDA}>:espresso::walberla_cuda>)
endif()

if(ESPRESSO_BUILD_WITH_FFTW)
add_subdirectory(fft)
endif()

add_subdirectory(accumulators)
add_subdirectory(analysis)
add_subdirectory(bond_breakage)
Expand Down
2 changes: 1 addition & 1 deletion src/core/electrostatics/elc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1128,7 +1128,7 @@ void charge_assign(elc_data const &elc, CoulombP3M &solver,
}
/* prepare local FFT mesh */
for (int i = 0; i < solver.p3m.local_mesh.size; i++)
solver.p3m.rs_mesh[i] = 0.;
solver.p3m.mesh.rs_scalar[i] = 0.;

for (auto zipped : p_q_pos_range) {
auto const p_q = boost::get<0>(zipped);
Expand Down
148 changes: 66 additions & 82 deletions src/core/electrostatics/p3m.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2010-2022 The ESPResSo project
* Copyright (C) 2010-2024 The ESPResSo project
* Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
* Max-Planck-Institute for Polymer Research, Theory Group
*
Expand Down Expand Up @@ -33,9 +33,9 @@
#include "electrostatics/p3m_gpu.hpp"
#include "electrostatics/p3m_gpu_error.hpp"

#include "fft/fft.hpp"
#include "p3m/TuningAlgorithm.hpp"
#include "p3m/TuningLogger.hpp"
#include "p3m/fft.hpp"
#include "p3m/for_each_3d.hpp"
#include "p3m/influence_function.hpp"
#include "p3m/math.hpp"
Expand Down Expand Up @@ -106,30 +106,27 @@ void CoulombP3M::count_charged_particles() {
/** Calculate the optimal influence function of @cite hockney88a.
* (optimised for force calculations)
*
* Each node calculates only the values for its domain in k-space
* (see fft.plan[3].mesh and fft.plan[3].start).
* Each node calculates only the values for its domain in k-space.
*
* See also: @cite hockney88a eq. 8-22 (p. 275). Note the somewhat
* different convention for the prefactors, which is described in
* @cite deserno98a @cite deserno98b.
*/
void CoulombP3M::calc_influence_function_force() {
auto const start = Utils::Vector3i{p3m.fft.plan[3].start};
auto const size = Utils::Vector3i{p3m.fft.plan[3].new_mesh};

p3m.g_force = grid_influence_function<1>(p3m.params, start, start + size,
get_system().box_geo->length_inv());
auto const [KX, KY, KZ] = p3m.fft->get_permutations();
p3m.g_force =
grid_influence_function<1>(p3m.params, p3m.mesh.start, p3m.mesh.stop, KX,
KY, KZ, get_system().box_geo->length_inv());
}

/** Calculate the influence function optimized for the energy and the
* self energy correction.
*/
void CoulombP3M::calc_influence_function_energy() {
auto const start = Utils::Vector3i{p3m.fft.plan[3].start};
auto const size = Utils::Vector3i{p3m.fft.plan[3].new_mesh};

p3m.g_energy = grid_influence_function<0>(p3m.params, start, start + size,
get_system().box_geo->length_inv());
auto const [KX, KY, KZ] = p3m.fft->get_permutations();
p3m.g_energy =
grid_influence_function<0>(p3m.params, p3m.mesh.start, p3m.mesh.stop, KX,
KY, KZ, get_system().box_geo->length_inv());
}

/** Aliasing sum used by @ref p3m_k_space_error. */
Expand All @@ -138,8 +135,8 @@ static auto p3m_tune_aliasing_sums(Utils::Vector3i const &shift,
Utils::Vector3d const &mesh_i, int cao,
double alpha_L_i) {

auto constexpr m_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN);
auto constexpr m_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1);
auto constexpr mesh_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN);
auto constexpr mesh_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1);
auto const factor1 = Utils::sqr(std::numbers::pi * alpha_L_i);
auto alias1 = 0.;
auto alias2 = 0.;
Expand All @@ -148,7 +145,7 @@ static auto p3m_tune_aliasing_sums(Utils::Vector3i const &shift,
Utils::Vector3i nm{};
Utils::Vector3d fnm{};
for_each_3d(
m_start, m_stop, indices,
mesh_start, mesh_stop, indices,
[&]() {
auto const norm_sq = nm.norm2();
auto const ex = exp(-factor1 * norm_sq);
Expand Down Expand Up @@ -202,14 +199,14 @@ static double p3m_k_space_error(double pref, Utils::Vector3i const &mesh,
auto const cotangent_sum = math::get_analytic_cotangent_sum_kernel(cao);
auto const mesh_i = 1. / Utils::Vector3d(mesh);
auto const alpha_L_i = 1. / alpha_L;
auto const m_stop = mesh / 2;
auto const m_start = -m_stop;
auto const mesh_stop = mesh / 2;
auto const mesh_start = -mesh_stop;
auto indices = Utils::Vector3i{};
auto values = Utils::Vector3d{};
auto he_q = 0.;

for_each_3d(
m_start, m_stop, indices,
mesh_start, mesh_stop, indices,
[&]() {
if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) {
auto const n2 = indices.norm2();
Expand Down Expand Up @@ -253,18 +250,9 @@ void CoulombP3M::init() {
elc_layer = actor->elc.space_layer;
}

assert(p3m.fft);
p3m.local_mesh.calc_local_ca_mesh(p3m.params, local_geo, skin, elc_layer);
p3m.sm.resize(comm_cart, p3m.local_mesh);

int ca_mesh_size = fft_init(p3m.local_mesh.dim, p3m.local_mesh.margin,
p3m.params.mesh, p3m.params.mesh_off, p3m.ks_pnum,
p3m.fft, ::communicator.node_grid, comm_cart);
p3m.rs_mesh.resize(ca_mesh_size);

for (auto &e : p3m.E_mesh) {
e.resize(ca_mesh_size);
}

p3m.fft->init_fft();
p3m.calc_differential_operator();

/* fix box length dependent constants */
Expand All @@ -290,7 +278,7 @@ CoulombP3M::CoulombP3M(P3MParameters &&parameters, double prefactor,

namespace {
template <int cao> struct AssignCharge {
void operator()(p3m_data_struct &p3m, double q,
void operator()(decltype(CoulombP3M::p3m) &p3m, double q,
Utils::Vector3d const &real_pos,
p3m_interpolation_cache &inter_weights) {
auto const w = p3m_calculate_interpolation_weights<cao>(
Expand All @@ -299,21 +287,22 @@ template <int cao> struct AssignCharge {
inter_weights.store(w);

p3m_interpolate(p3m.local_mesh, w, [q, &p3m](int ind, double w) {
p3m.rs_mesh[ind] += w * q;
p3m.mesh.rs_scalar[ind] += w * q;
});
}

void operator()(p3m_data_struct &p3m, double q,
void operator()(decltype(CoulombP3M::p3m) &p3m, double q,
Utils::Vector3d const &real_pos) {
p3m_interpolate(
p3m.local_mesh,
p3m_calculate_interpolation_weights<cao>(real_pos, p3m.params.ai,
p3m.local_mesh),
[q, &p3m](int ind, double w) { p3m.rs_mesh[ind] += w * q; });
[q, &p3m](int ind, double w) { p3m.mesh.rs_scalar[ind] += w * q; });
}

template <typename combined_ranges>
void operator()(p3m_data_struct &p3m, combined_ranges const &p_q_pos_range) {
void operator()(decltype(CoulombP3M::p3m) &p3m,
combined_ranges const &p_q_pos_range) {
for (auto zipped : p_q_pos_range) {
auto const p_q = boost::get<0>(zipped);
auto const &p_pos = boost::get<1>(zipped);
Expand All @@ -330,7 +319,7 @@ void CoulombP3M::charge_assign(ParticleRange const &particles) {

/* prepare local FFT mesh */
for (int i = 0; i < p3m.local_mesh.size; i++)
p3m.rs_mesh[i] = 0.0;
p3m.mesh.rs_scalar[i] = 0.0;

auto p_q_range = ParticlePropertyRange::charge_range(particles);
auto p_pos_range = ParticlePropertyRange::pos_range(particles);
Expand All @@ -352,7 +341,7 @@ void CoulombP3M::assign_charge(double q, Utils::Vector3d const &real_pos) {

template <int cao> struct AssignForces {
template <typename combined_ranges>
void operator()(p3m_data_struct &p3m, double force_prefac,
void operator()(decltype(CoulombP3M::p3m) &p3m, double force_prefac,
combined_ranges const &p_q_force_range) const {

assert(cao == p3m.inter_weights.cao());
Expand All @@ -369,8 +358,9 @@ template <int cao> struct AssignForces {

Utils::Vector3d force{};
p3m_interpolate(p3m.local_mesh, w, [&force, &p3m](int ind, double w) {
force += w * Utils::Vector3d{p3m.E_mesh[0][ind], p3m.E_mesh[1][ind],
p3m.E_mesh[2][ind]};
force += w * Utils::Vector3d{p3m.mesh.rs_fields[0u][ind],
p3m.mesh.rs_fields[1u][ind],
p3m.mesh.rs_fields[2u][ind]};
});

p_force -= pref * force;
Expand Down Expand Up @@ -400,36 +390,35 @@ static auto calc_dipole_moment(boost::mpi::communicator const &comm,
*/
Utils::Vector9d
CoulombP3M::long_range_pressure(ParticleRange const &particles) {
using namespace detail::FFT_indexing;

auto const &box_geo = *get_system().box_geo;

Utils::Vector9d node_k_space_pressure_tensor{};

if (p3m.sum_q2 > 0.) {
charge_assign(particles);
p3m.sm.gather_grid(p3m.rs_mesh.data(), comm_cart, p3m.local_mesh.dim);
fft_perform_forw(p3m.rs_mesh.data(), p3m.fft, comm_cart);
p3m.fft->perform_fwd_fft();

auto constexpr m_start = Utils::Vector3i::broadcast(0);
auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
auto const &mesh_stop = p3m.mesh.size;
auto const &offset = p3m.mesh.start;
auto const half_alpha_inv_sq = Utils::sqr(1. / 2. / p3m.params.alpha);
auto const m_stop = std::span(p3m.fft.plan[3].new_mesh);
auto const offset = std::span(p3m.fft.plan[3].start);
auto const wavevector = (2. * std::numbers::pi) * box_geo.length_inv();
auto const [KX, KY, KZ] = p3m.fft->get_permutations();
auto indices = Utils::Vector3i{};
auto index = std::size_t(0u);
auto diagonal = 0.;

for_each_3d(m_start, m_stop, indices, [&]() {
auto const kx = p3m.d_op[RX][indices[KX] + offset[KX]] * wavevector[RX];
auto const ky = p3m.d_op[RY][indices[KY] + offset[KY]] * wavevector[RY];
auto const kz = p3m.d_op[RZ][indices[KZ] + offset[KZ]] * wavevector[RZ];
for_each_3d(mesh_start, mesh_stop, indices, [&]() {
auto const shift = indices + offset;
auto const kx = p3m.d_op[0u][shift[KX]] * wavevector[0u];
auto const ky = p3m.d_op[1u][shift[KY]] * wavevector[1u];
auto const kz = p3m.d_op[2u][shift[KZ]] * wavevector[2u];
auto const norm_sq = Utils::sqr(kx) + Utils::sqr(ky) + Utils::sqr(kz);

if (norm_sq != 0.) {
auto const node_k_space_energy =
p3m.g_energy[index] * (Utils::sqr(p3m.rs_mesh[2u * index + 0u]) +
Utils::sqr(p3m.rs_mesh[2u * index + 1u]));
p3m.g_energy[index] *
(Utils::sqr(p3m.mesh.rs_scalar[2u * index + 0u]) +
Utils::sqr(p3m.mesh.rs_scalar[2u * index + 1u]));
auto const vterm = -2. * (1. / norm_sq + half_alpha_inv_sq);
auto const pref = node_k_space_energy * vterm;
node_k_space_pressure_tensor[0u] += pref * kx * kx; /* sigma_xx */
Expand Down Expand Up @@ -470,10 +459,7 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag,
system.coulomb.impl->solver)) {
charge_assign(particles);
}
/* Gather information for FFT grid inside the nodes domain (inner local
* mesh) and perform forward 3D FFT (Charge Assignment Mesh). */
p3m.sm.gather_grid(p3m.rs_mesh.data(), comm_cart, p3m.local_mesh.dim);
fft_perform_forw(p3m.rs_mesh.data(), p3m.fft, comm_cart);
p3m.fft->perform_fwd_fft();
}

auto p_q_range = ParticlePropertyRange::charge_range(particles);
Expand All @@ -496,45 +482,41 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag,
/* === k-space force calculation === */
if (force_flag) {
/* i*k differentiation */
auto constexpr m_start = Utils::Vector3i::broadcast(0);
auto const m_stop = std::span(p3m.fft.plan[3].new_mesh);
auto const offset = std::span(p3m.fft.plan[3].start);
auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
auto const &mesh_stop = p3m.mesh.size;
auto const &offset = p3m.mesh.start;
auto const wavevector = (2. * std::numbers::pi) * box_geo.length_inv();
auto indices = Utils::Vector3i{};
auto index = std::size_t(0u);

/* compute electric field */
// Eq. (3.49) @cite deserno00b
for_each_3d(m_start, m_stop, indices, [&]() {
auto const rho_hat = std::complex<double>(p3m.rs_mesh[2u * index + 0u],
p3m.rs_mesh[2u * index + 1u]);
for_each_3d(mesh_start, mesh_stop, indices, [&]() {
auto const rho_hat =
std::complex<double>(p3m.mesh.rs_scalar[2u * index + 0u],
p3m.mesh.rs_scalar[2u * index + 1u]);
auto const phi_hat = p3m.g_force[index] * rho_hat;

for (int d = 0; d < 3; d++) {
/* direction in r-space: */
int d_rs = (d + p3m.ks_pnum) % 3;
int d_rs = (d + p3m.mesh.ks_pnum) % 3;
/* directions */
auto const k =
p3m.d_op[d_rs][indices[d] + offset[d]] * wavevector[d_rs];

/* i*k*(Re+i*Im) = - Im*k + i*Re*k (i=sqrt(-1)) */
p3m.E_mesh[d_rs][2u * index + 0u] = -k * phi_hat.imag();
p3m.E_mesh[d_rs][2u * index + 1u] = +k * phi_hat.real();
p3m.mesh.rs_fields[d_rs][2u * index + 0u] = -k * phi_hat.imag();
p3m.mesh.rs_fields[d_rs][2u * index + 1u] = +k * phi_hat.real();
}

++index;
});

/* Back FFT force component mesh */
auto const check_complex = !p3m.params.tuning and check_complex_residuals;
for (int d = 0; d < 3; d++) {
fft_perform_back(p3m.E_mesh[d].data(), check_complex, p3m.fft, comm_cart);
}

/* redistribute force component mesh */
std::array<double *, 3> E_fields = {
{p3m.E_mesh[0].data(), p3m.E_mesh[1].data(), p3m.E_mesh[2].data()}};
p3m.sm.spread_grid(E_fields, comm_cart, p3m.local_mesh.dim);
auto const check_residuals =
not p3m.params.tuning and check_complex_residuals;
p3m.fft->check_complex_residuals = check_residuals;
p3m.fft->perform_field_back_fft();
p3m.fft->check_complex_residuals = false;

auto const force_prefac = prefactor / volume;
Utils::integral_parameter<int, AssignForces, 1, 7>(
Expand All @@ -556,11 +538,13 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag,
/* === k-space energy calculation === */
if (energy_flag or npt_flag) {
auto node_energy = 0.;
for (int i = 0; i < p3m.fft.plan[3].new_size; i++) {
auto const mesh_length = Utils::product(p3m.mesh.size);
for (int i = 0; i < mesh_length; i++) {
// Use the energy optimized influence function for energy!
// Eq. (3.40) @cite deserno00b
node_energy += p3m.g_energy[i] * (Utils::sqr(p3m.rs_mesh[2 * i]) +
Utils::sqr(p3m.rs_mesh[2 * i + 1]));
node_energy +=
p3m.g_energy[i] * (Utils::sqr(p3m.mesh.rs_scalar[2 * i]) +
Utils::sqr(p3m.mesh.rs_scalar[2 * i + 1]));
}
node_energy /= 2. * volume;

Expand Down Expand Up @@ -596,7 +580,7 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag,
}

class CoulombTuningAlgorithm : public TuningAlgorithm {
p3m_data_struct &p3m;
decltype(CoulombP3M::p3m) &p3m;
double m_mesh_density_min = -1., m_mesh_density_max = -1.;
// indicates if mesh should be tuned
bool m_tune_mesh = false;
Expand All @@ -605,7 +589,7 @@ class CoulombTuningAlgorithm : public TuningAlgorithm {
P3MParameters &get_params() override { return p3m.params; }

public:
CoulombTuningAlgorithm(System::System &system, p3m_data_struct &input_p3m,
CoulombTuningAlgorithm(System::System &system, decltype(p3m) &input_p3m,
double prefactor, int timings)
: TuningAlgorithm(system, prefactor, timings), p3m{input_p3m} {}

Expand Down
Loading

0 comments on commit e4d63b2

Please sign in to comment.