Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hide FFT implementation details #4947

Merged
merged 2 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ if(ESPRESSO_BUILD_WITH_WALBERLA)
$<$<BOOL:${ESPRESSO_BUILD_WITH_CUDA}>:espresso::walberla_cuda>)
endif()

if(ESPRESSO_BUILD_WITH_FFTW)
add_subdirectory(fft)
endif()

add_subdirectory(accumulators)
add_subdirectory(analysis)
add_subdirectory(bond_breakage)
Expand Down
2 changes: 1 addition & 1 deletion src/core/electrostatics/elc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1128,7 +1128,7 @@ void charge_assign(elc_data const &elc, CoulombP3M &solver,
}
/* prepare local FFT mesh */
for (int i = 0; i < solver.p3m.local_mesh.size; i++)
solver.p3m.rs_mesh[i] = 0.;
solver.p3m.mesh.rs_scalar[i] = 0.;

for (auto zipped : p_q_pos_range) {
auto const p_q = boost::get<0>(zipped);
Expand Down
148 changes: 66 additions & 82 deletions src/core/electrostatics/p3m.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2010-2022 The ESPResSo project
* Copyright (C) 2010-2024 The ESPResSo project
* Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
* Max-Planck-Institute for Polymer Research, Theory Group
*
Expand Down Expand Up @@ -33,9 +33,9 @@
#include "electrostatics/p3m_gpu.hpp"
#include "electrostatics/p3m_gpu_error.hpp"

#include "fft/fft.hpp"
#include "p3m/TuningAlgorithm.hpp"
#include "p3m/TuningLogger.hpp"
#include "p3m/fft.hpp"
#include "p3m/for_each_3d.hpp"
#include "p3m/influence_function.hpp"
#include "p3m/math.hpp"
Expand Down Expand Up @@ -106,30 +106,27 @@ void CoulombP3M::count_charged_particles() {
/** Calculate the optimal influence function of @cite hockney88a.
* (optimised for force calculations)
*
* Each node calculates only the values for its domain in k-space
* (see fft.plan[3].mesh and fft.plan[3].start).
* Each node calculates only the values for its domain in k-space.
*
* See also: @cite hockney88a eq. 8-22 (p. 275). Note the somewhat
* different convention for the prefactors, which is described in
* @cite deserno98a @cite deserno98b.
*/
void CoulombP3M::calc_influence_function_force() {
auto const start = Utils::Vector3i{p3m.fft.plan[3].start};
auto const size = Utils::Vector3i{p3m.fft.plan[3].new_mesh};

p3m.g_force = grid_influence_function<1>(p3m.params, start, start + size,
get_system().box_geo->length_inv());
auto const [KX, KY, KZ] = p3m.fft->get_permutations();
p3m.g_force =
grid_influence_function<1>(p3m.params, p3m.mesh.start, p3m.mesh.stop, KX,
KY, KZ, get_system().box_geo->length_inv());
}

/** Calculate the influence function optimized for the energy and the
* self energy correction.
*/
void CoulombP3M::calc_influence_function_energy() {
auto const start = Utils::Vector3i{p3m.fft.plan[3].start};
auto const size = Utils::Vector3i{p3m.fft.plan[3].new_mesh};

p3m.g_energy = grid_influence_function<0>(p3m.params, start, start + size,
get_system().box_geo->length_inv());
auto const [KX, KY, KZ] = p3m.fft->get_permutations();
p3m.g_energy =
grid_influence_function<0>(p3m.params, p3m.mesh.start, p3m.mesh.stop, KX,
KY, KZ, get_system().box_geo->length_inv());
}

/** Aliasing sum used by @ref p3m_k_space_error. */
Expand All @@ -138,8 +135,8 @@ static auto p3m_tune_aliasing_sums(Utils::Vector3i const &shift,
Utils::Vector3d const &mesh_i, int cao,
double alpha_L_i) {

auto constexpr m_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN);
auto constexpr m_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1);
auto constexpr mesh_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN);
auto constexpr mesh_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1);
auto const factor1 = Utils::sqr(std::numbers::pi * alpha_L_i);
auto alias1 = 0.;
auto alias2 = 0.;
Expand All @@ -148,7 +145,7 @@ static auto p3m_tune_aliasing_sums(Utils::Vector3i const &shift,
Utils::Vector3i nm{};
Utils::Vector3d fnm{};
for_each_3d(
m_start, m_stop, indices,
mesh_start, mesh_stop, indices,
[&]() {
auto const norm_sq = nm.norm2();
auto const ex = exp(-factor1 * norm_sq);
Expand Down Expand Up @@ -202,14 +199,14 @@ static double p3m_k_space_error(double pref, Utils::Vector3i const &mesh,
auto const cotangent_sum = math::get_analytic_cotangent_sum_kernel(cao);
auto const mesh_i = 1. / Utils::Vector3d(mesh);
auto const alpha_L_i = 1. / alpha_L;
auto const m_stop = mesh / 2;
auto const m_start = -m_stop;
auto const mesh_stop = mesh / 2;
auto const mesh_start = -mesh_stop;
auto indices = Utils::Vector3i{};
auto values = Utils::Vector3d{};
auto he_q = 0.;

for_each_3d(
m_start, m_stop, indices,
mesh_start, mesh_stop, indices,
[&]() {
if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) {
auto const n2 = indices.norm2();
Expand Down Expand Up @@ -253,18 +250,9 @@ void CoulombP3M::init() {
elc_layer = actor->elc.space_layer;
}

assert(p3m.fft);
p3m.local_mesh.calc_local_ca_mesh(p3m.params, local_geo, skin, elc_layer);
p3m.sm.resize(comm_cart, p3m.local_mesh);

int ca_mesh_size = fft_init(p3m.local_mesh.dim, p3m.local_mesh.margin,
p3m.params.mesh, p3m.params.mesh_off, p3m.ks_pnum,
p3m.fft, ::communicator.node_grid, comm_cart);
p3m.rs_mesh.resize(ca_mesh_size);

for (auto &e : p3m.E_mesh) {
e.resize(ca_mesh_size);
}

p3m.fft->init_fft();
p3m.calc_differential_operator();

/* fix box length dependent constants */
Expand All @@ -290,7 +278,7 @@ CoulombP3M::CoulombP3M(P3MParameters &&parameters, double prefactor,

namespace {
template <int cao> struct AssignCharge {
void operator()(p3m_data_struct &p3m, double q,
void operator()(decltype(CoulombP3M::p3m) &p3m, double q,
Utils::Vector3d const &real_pos,
p3m_interpolation_cache &inter_weights) {
auto const w = p3m_calculate_interpolation_weights<cao>(
Expand All @@ -299,21 +287,22 @@ template <int cao> struct AssignCharge {
inter_weights.store(w);

p3m_interpolate(p3m.local_mesh, w, [q, &p3m](int ind, double w) {
p3m.rs_mesh[ind] += w * q;
p3m.mesh.rs_scalar[ind] += w * q;
});
}

void operator()(p3m_data_struct &p3m, double q,
void operator()(decltype(CoulombP3M::p3m) &p3m, double q,
Utils::Vector3d const &real_pos) {
p3m_interpolate(
p3m.local_mesh,
p3m_calculate_interpolation_weights<cao>(real_pos, p3m.params.ai,
p3m.local_mesh),
[q, &p3m](int ind, double w) { p3m.rs_mesh[ind] += w * q; });
[q, &p3m](int ind, double w) { p3m.mesh.rs_scalar[ind] += w * q; });
}

template <typename combined_ranges>
void operator()(p3m_data_struct &p3m, combined_ranges const &p_q_pos_range) {
void operator()(decltype(CoulombP3M::p3m) &p3m,
combined_ranges const &p_q_pos_range) {
for (auto zipped : p_q_pos_range) {
auto const p_q = boost::get<0>(zipped);
auto const &p_pos = boost::get<1>(zipped);
Expand All @@ -330,7 +319,7 @@ void CoulombP3M::charge_assign(ParticleRange const &particles) {

/* prepare local FFT mesh */
for (int i = 0; i < p3m.local_mesh.size; i++)
p3m.rs_mesh[i] = 0.0;
p3m.mesh.rs_scalar[i] = 0.0;

auto p_q_range = ParticlePropertyRange::charge_range(particles);
auto p_pos_range = ParticlePropertyRange::pos_range(particles);
Expand All @@ -352,7 +341,7 @@ void CoulombP3M::assign_charge(double q, Utils::Vector3d const &real_pos) {

template <int cao> struct AssignForces {
template <typename combined_ranges>
void operator()(p3m_data_struct &p3m, double force_prefac,
void operator()(decltype(CoulombP3M::p3m) &p3m, double force_prefac,
combined_ranges const &p_q_force_range) const {

assert(cao == p3m.inter_weights.cao());
Expand All @@ -369,8 +358,9 @@ template <int cao> struct AssignForces {

Utils::Vector3d force{};
p3m_interpolate(p3m.local_mesh, w, [&force, &p3m](int ind, double w) {
force += w * Utils::Vector3d{p3m.E_mesh[0][ind], p3m.E_mesh[1][ind],
p3m.E_mesh[2][ind]};
force += w * Utils::Vector3d{p3m.mesh.rs_fields[0u][ind],
p3m.mesh.rs_fields[1u][ind],
p3m.mesh.rs_fields[2u][ind]};
});

p_force -= pref * force;
Expand Down Expand Up @@ -400,36 +390,35 @@ static auto calc_dipole_moment(boost::mpi::communicator const &comm,
*/
Utils::Vector9d
CoulombP3M::long_range_pressure(ParticleRange const &particles) {
using namespace detail::FFT_indexing;

auto const &box_geo = *get_system().box_geo;

Utils::Vector9d node_k_space_pressure_tensor{};

if (p3m.sum_q2 > 0.) {
charge_assign(particles);
p3m.sm.gather_grid(p3m.rs_mesh.data(), comm_cart, p3m.local_mesh.dim);
fft_perform_forw(p3m.rs_mesh.data(), p3m.fft, comm_cart);
p3m.fft->perform_fwd_fft();

auto constexpr m_start = Utils::Vector3i::broadcast(0);
auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
auto const &mesh_stop = p3m.mesh.size;
auto const &offset = p3m.mesh.start;
auto const half_alpha_inv_sq = Utils::sqr(1. / 2. / p3m.params.alpha);
auto const m_stop = std::span(p3m.fft.plan[3].new_mesh);
auto const offset = std::span(p3m.fft.plan[3].start);
auto const wavevector = (2. * std::numbers::pi) * box_geo.length_inv();
auto const [KX, KY, KZ] = p3m.fft->get_permutations();
auto indices = Utils::Vector3i{};
auto index = std::size_t(0u);
auto diagonal = 0.;

for_each_3d(m_start, m_stop, indices, [&]() {
auto const kx = p3m.d_op[RX][indices[KX] + offset[KX]] * wavevector[RX];
auto const ky = p3m.d_op[RY][indices[KY] + offset[KY]] * wavevector[RY];
auto const kz = p3m.d_op[RZ][indices[KZ] + offset[KZ]] * wavevector[RZ];
for_each_3d(mesh_start, mesh_stop, indices, [&]() {
auto const shift = indices + offset;
auto const kx = p3m.d_op[0u][shift[KX]] * wavevector[0u];
auto const ky = p3m.d_op[1u][shift[KY]] * wavevector[1u];
auto const kz = p3m.d_op[2u][shift[KZ]] * wavevector[2u];
auto const norm_sq = Utils::sqr(kx) + Utils::sqr(ky) + Utils::sqr(kz);

if (norm_sq != 0.) {
auto const node_k_space_energy =
p3m.g_energy[index] * (Utils::sqr(p3m.rs_mesh[2u * index + 0u]) +
Utils::sqr(p3m.rs_mesh[2u * index + 1u]));
p3m.g_energy[index] *
(Utils::sqr(p3m.mesh.rs_scalar[2u * index + 0u]) +
Utils::sqr(p3m.mesh.rs_scalar[2u * index + 1u]));
auto const vterm = -2. * (1. / norm_sq + half_alpha_inv_sq);
auto const pref = node_k_space_energy * vterm;
node_k_space_pressure_tensor[0u] += pref * kx * kx; /* sigma_xx */
Expand Down Expand Up @@ -470,10 +459,7 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag,
system.coulomb.impl->solver)) {
charge_assign(particles);
}
/* Gather information for FFT grid inside the nodes domain (inner local
* mesh) and perform forward 3D FFT (Charge Assignment Mesh). */
p3m.sm.gather_grid(p3m.rs_mesh.data(), comm_cart, p3m.local_mesh.dim);
fft_perform_forw(p3m.rs_mesh.data(), p3m.fft, comm_cart);
p3m.fft->perform_fwd_fft();
}

auto p_q_range = ParticlePropertyRange::charge_range(particles);
Expand All @@ -496,45 +482,41 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag,
/* === k-space force calculation === */
if (force_flag) {
/* i*k differentiation */
auto constexpr m_start = Utils::Vector3i::broadcast(0);
auto const m_stop = std::span(p3m.fft.plan[3].new_mesh);
auto const offset = std::span(p3m.fft.plan[3].start);
auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
auto const &mesh_stop = p3m.mesh.size;
auto const &offset = p3m.mesh.start;
auto const wavevector = (2. * std::numbers::pi) * box_geo.length_inv();
auto indices = Utils::Vector3i{};
auto index = std::size_t(0u);

/* compute electric field */
// Eq. (3.49) @cite deserno00b
for_each_3d(m_start, m_stop, indices, [&]() {
auto const rho_hat = std::complex<double>(p3m.rs_mesh[2u * index + 0u],
p3m.rs_mesh[2u * index + 1u]);
for_each_3d(mesh_start, mesh_stop, indices, [&]() {
auto const rho_hat =
std::complex<double>(p3m.mesh.rs_scalar[2u * index + 0u],
p3m.mesh.rs_scalar[2u * index + 1u]);
auto const phi_hat = p3m.g_force[index] * rho_hat;

for (int d = 0; d < 3; d++) {
/* direction in r-space: */
int d_rs = (d + p3m.ks_pnum) % 3;
int d_rs = (d + p3m.mesh.ks_pnum) % 3;
/* directions */
auto const k =
p3m.d_op[d_rs][indices[d] + offset[d]] * wavevector[d_rs];

/* i*k*(Re+i*Im) = - Im*k + i*Re*k (i=sqrt(-1)) */
p3m.E_mesh[d_rs][2u * index + 0u] = -k * phi_hat.imag();
p3m.E_mesh[d_rs][2u * index + 1u] = +k * phi_hat.real();
p3m.mesh.rs_fields[d_rs][2u * index + 0u] = -k * phi_hat.imag();
p3m.mesh.rs_fields[d_rs][2u * index + 1u] = +k * phi_hat.real();
}

++index;
});

/* Back FFT force component mesh */
auto const check_complex = !p3m.params.tuning and check_complex_residuals;
for (int d = 0; d < 3; d++) {
fft_perform_back(p3m.E_mesh[d].data(), check_complex, p3m.fft, comm_cart);
}

/* redistribute force component mesh */
std::array<double *, 3> E_fields = {
{p3m.E_mesh[0].data(), p3m.E_mesh[1].data(), p3m.E_mesh[2].data()}};
p3m.sm.spread_grid(E_fields, comm_cart, p3m.local_mesh.dim);
auto const check_residuals =
not p3m.params.tuning and check_complex_residuals;
p3m.fft->check_complex_residuals = check_residuals;
p3m.fft->perform_field_back_fft();
p3m.fft->check_complex_residuals = false;

auto const force_prefac = prefactor / volume;
Utils::integral_parameter<int, AssignForces, 1, 7>(
Expand All @@ -556,11 +538,13 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag,
/* === k-space energy calculation === */
if (energy_flag or npt_flag) {
auto node_energy = 0.;
for (int i = 0; i < p3m.fft.plan[3].new_size; i++) {
auto const mesh_length = Utils::product(p3m.mesh.size);
for (int i = 0; i < mesh_length; i++) {
// Use the energy optimized influence function for energy!
// Eq. (3.40) @cite deserno00b
node_energy += p3m.g_energy[i] * (Utils::sqr(p3m.rs_mesh[2 * i]) +
Utils::sqr(p3m.rs_mesh[2 * i + 1]));
node_energy +=
p3m.g_energy[i] * (Utils::sqr(p3m.mesh.rs_scalar[2 * i]) +
Utils::sqr(p3m.mesh.rs_scalar[2 * i + 1]));
}
node_energy /= 2. * volume;

Expand Down Expand Up @@ -596,7 +580,7 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag,
}

class CoulombTuningAlgorithm : public TuningAlgorithm {
p3m_data_struct &p3m;
decltype(CoulombP3M::p3m) &p3m;
double m_mesh_density_min = -1., m_mesh_density_max = -1.;
// indicates if mesh should be tuned
bool m_tune_mesh = false;
Expand All @@ -605,7 +589,7 @@ class CoulombTuningAlgorithm : public TuningAlgorithm {
P3MParameters &get_params() override { return p3m.params; }

public:
CoulombTuningAlgorithm(System::System &system, p3m_data_struct &input_p3m,
CoulombTuningAlgorithm(System::System &system, decltype(p3m) &input_p3m,
double prefactor, int timings)
: TuningAlgorithm(system, prefactor, timings), p3m{input_p3m} {}

Expand Down
Loading