From 0a763c5c145fec4c898589dd948e5d7a689be8f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 13 Nov 2024 19:19:07 +0100 Subject: [PATCH 1/3] Improve LB GPU field accessors performance --- .../templates/FieldAccessors.tmpl.cu | 56 ++++++++---- .../templates/FieldAccessors.tmpl.cuh | 2 + .../templates/FieldAccessors.tmpl.h | 21 +++++ .../src/lattice_boltzmann/LBWalberlaImpl.hpp | 4 +- .../FieldAccessorsDoublePrecision.h | 51 +++++++++++ .../FieldAccessorsDoublePrecisionCUDA.cu | 87 +++++++++++++------ .../FieldAccessorsDoublePrecisionCUDA.cuh | 1 + .../FieldAccessorsSinglePrecision.h | 51 +++++++++++ .../FieldAccessorsSinglePrecisionCUDA.cu | 87 +++++++++++++------ .../FieldAccessorsSinglePrecisionCUDA.cuh | 1 + testsuite/python/lb_pressure_tensor.py | 25 +++--- 11 files changed, 301 insertions(+), 85 deletions(-) diff --git a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cu b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cu index c9f8ae4dc9..df0be6acad 100644 --- a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cu +++ b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cu @@ -899,7 +899,7 @@ namespace Force { namespace MomentumDensity { // LCOV_EXCL_START - __global__ void kernel_sum( + __global__ void kernel_get( gpu::FieldAccessor< {{dtype}} > pdf, gpu::FieldAccessor< {{dtype}} > force, {{dtype}} * RESTRICT out ) @@ -914,7 +914,7 @@ namespace MomentumDensity {% endfor -%} {{momentum_density_getter | substitute_force_getter_cu | indent(8) }} {% for i in range(D) -%} - out[{{i}}u] += md_{{i}}; + out[{{i}}u] = md_{{i}}; {% endfor %} } } @@ -924,19 +924,22 @@ namespace MomentumDensity gpu::GPUField< {{dtype}} > const * pdf_field, gpu::GPUField< {{dtype}} > const * force_field ) { - thrust::device_vector< {{dtype}} > dev_data({{D}}u, {{dtype}} {0}); + auto const ci = pdf_field->xyzSize(); + thrust::device_vector< {{dtype}} > dev_data({{D}}u * ci.numCells()); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, { - Cell cell(x, y, z); - CellInterval ci ( cell, cell ); - auto kernel = gpu::make_kernel( kernel_sum ); - kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); - kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) ); - kernel.addParam( dev_data_ptr ); - kernel(); - }); + auto kernel = gpu::make_kernel( kernel_get ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci) ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) ); + kernel.addParam( dev_data_ptr ); + kernel(); + std::vector< {{dtype}} > out(dev_data.size()); + thrust::copy(dev_data.begin(), dev_data.end(), out.data()); Vector{{D}}< {{dtype}} > mom({{dtype}} {0}); - thrust::copy(dev_data.begin(), dev_data.begin() + {{D}}u, mom.data()); + for (auto i = uint_t{ 0u }; i < {{D}}u * ci.numCells(); i += {{D}}u) { + {% for j in range(D) -%} + mom[{{j}}u] += out[i + {{j}}u]; + {% endfor -%} + } return mom; } } // namespace MomentumDensity @@ -977,7 +980,7 @@ namespace PressureTensor Matrix{{D}}< {{dtype}} > out; thrust::copy(dev_data.begin(), dev_data.end(), out.data()); return out; - } + } std::vector< {{dtype}} > get( gpu::GPUField< {{dtype}} > const * pdf_field, @@ -992,7 +995,30 @@ namespace PressureTensor std::vector< {{dtype}} > out(dev_data.size()); thrust::copy(dev_data.begin(), dev_data.end(), out.data()); return out; - } + } + + Matrix{{D}}< {{dtype}} > reduce( + gpu::GPUField< {{dtype}} > const * pdf_field) + { + auto const ci = pdf_field->xyzSize(); + thrust::device_vector< {{dtype}} > dev_data({{D**2}}u * ci.numCells()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel( kernel_get ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); + kernel.addParam( dev_data_ptr ); + kernel(); + std::vector< {{dtype}} > out(dev_data.size()); + thrust::copy(dev_data.begin(), dev_data.end(), out.data()); + Matrix{{D}}< {{dtype}} > pressureTensor({{dtype}} {0}); + for (auto i = uint_t{ 0u }; i < {{D**2}}u * ci.numCells(); i += {{D**2}}u) { + {% for i in range(D) -%} + {% for j in range(D) -%} + pressureTensor[{{i*D+j}}u] += out[i + {{i*D+j}}u]; + {% endfor %} + {% endfor %} + } + return pressureTensor; + } } // namespace PressureTensor diff --git a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cuh b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cuh index 65f776abee..ef29b2b508 100644 --- a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cuh +++ b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cuh @@ -213,6 +213,8 @@ namespace PressureTensor { std::vector< {{dtype}} > get( gpu::GPUField< {{dtype}} > const * pdf_field, CellInterval const & ci ); + Matrix{{D}}< {{dtype}} > + reduce( gpu::GPUField< {{dtype}} > const * pdf_field ); } // namespace PressureTensor } // namespace accessor diff --git a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h index d443243bba..bcdbb4fd22 100644 --- a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h +++ b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h @@ -641,6 +641,27 @@ namespace PressureTensor } return out; } + + inline auto + reduce( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field) + { + Matrix{{D}}< {{dtype}} > pressureTensor({{dtype}} {0}); + WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, { + const {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(Q) -%} + const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u }); + {% endfor -%} + + {{second_momentum_getter | indent(8) }} + + {% for i in range(D) -%} + {% for j in range(D) -%} + pressureTensor[{{i*D+j}}u] += p_{{i*D+j}}; + {% endfor %} + {% endfor %} + }); + return pressureTensor; + } } // namespace PressureTensor } // namespace accessor diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp index 8986fed7b4..414433006f 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp +++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp @@ -1508,9 +1508,7 @@ class LBWalberlaImpl : public LBWalberlaBase { Matrix3 tensor(FloatType{0}); for (auto block = blocks->begin(); block != blocks->end(); ++block) { auto pdf_field = block->template getData(m_pdf_field_id); - WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, { - tensor += lbm::accessor::PressureTensor::get(pdf_field, Cell{x, y, z}); - }); + tensor += lbm::accessor::PressureTensor::reduce(pdf_field); } auto const grid_size = get_lattice().get_grid_dimensions(); auto const number_of_nodes = Utils::product(grid_size); diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h index c73cb58c14..bff4efa0fc 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h @@ -1242,6 +1242,57 @@ inline auto get(GhostLayerField const *pdf_field, } return out; } + +inline auto reduce(GhostLayerField const *pdf_field) { + Matrix3 pressureTensor(double{0}); + WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, { + const double &xyz0 = pdf_field->get(x, y, z, uint_t{0u}); + const double f_0 = pdf_field->getF(&xyz0, uint_t{0u}); + const double f_1 = pdf_field->getF(&xyz0, uint_t{1u}); + const double f_2 = pdf_field->getF(&xyz0, uint_t{2u}); + const double f_3 = pdf_field->getF(&xyz0, uint_t{3u}); + const double f_4 = pdf_field->getF(&xyz0, uint_t{4u}); + const double f_5 = pdf_field->getF(&xyz0, uint_t{5u}); + const double f_6 = pdf_field->getF(&xyz0, uint_t{6u}); + const double f_7 = pdf_field->getF(&xyz0, uint_t{7u}); + const double f_8 = pdf_field->getF(&xyz0, uint_t{8u}); + const double f_9 = pdf_field->getF(&xyz0, uint_t{9u}); + const double f_10 = pdf_field->getF(&xyz0, uint_t{10u}); + const double f_11 = pdf_field->getF(&xyz0, uint_t{11u}); + const double f_12 = pdf_field->getF(&xyz0, uint_t{12u}); + const double f_13 = pdf_field->getF(&xyz0, uint_t{13u}); + const double f_14 = pdf_field->getF(&xyz0, uint_t{14u}); + const double f_15 = pdf_field->getF(&xyz0, uint_t{15u}); + const double f_16 = pdf_field->getF(&xyz0, uint_t{16u}); + const double f_17 = pdf_field->getF(&xyz0, uint_t{17u}); + const double f_18 = pdf_field->getF(&xyz0, uint_t{18u}); + const double p_0 = + f_10 + f_13 + f_14 + f_17 + f_18 + f_3 + f_4 + f_7 + f_8 + f_9; + const double p_1 = -f_10 - f_7 + f_8 + f_9; + const double p_2 = -f_13 + f_14 + f_17 - f_18; + const double p_3 = -f_10 - f_7 + f_8 + f_9; + const double p_4 = + f_1 + f_10 + f_11 + f_12 + f_15 + f_16 + f_2 + f_7 + f_8 + f_9; + const double p_5 = f_11 - f_12 - f_15 + f_16; + const double p_6 = -f_13 + f_14 + f_17 - f_18; + const double p_7 = f_11 - f_12 - f_15 + f_16; + const double p_8 = + f_11 + f_12 + f_13 + f_14 + f_15 + f_16 + f_17 + f_18 + f_5 + f_6; + + pressureTensor[0u] += p_0; + pressureTensor[1u] += p_1; + pressureTensor[2u] += p_2; + + pressureTensor[3u] += p_3; + pressureTensor[4u] += p_4; + pressureTensor[5u] += p_5; + + pressureTensor[6u] += p_6; + pressureTensor[7u] += p_7; + pressureTensor[8u] += p_8; + }); + return pressureTensor; +} } // namespace PressureTensor } // namespace accessor diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cu index 62dda87100..ee087416d9 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cu +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cu @@ -756,19 +756,6 @@ double get( return rho; } -void set( - gpu::GPUField *pdf_field, - const double rho, - Cell const &cell) { - CellInterval ci(cell, cell); - thrust::device_vector dev_data(1u, rho); - auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_set); - kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); - kernel.addParam(const_cast(dev_data_ptr)); - kernel(); -} - std::vector get( gpu::GPUField const *pdf_field, CellInterval const &ci) { @@ -783,6 +770,19 @@ std::vector get( return out; } +void set( + gpu::GPUField *pdf_field, + const double rho, + Cell const &cell) { + CellInterval ci(cell, cell); + thrust::device_vector dev_data(1u, rho); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_set); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addParam(const_cast(dev_data_ptr)); + kernel(); +} + void set( gpu::GPUField *pdf_field, std::vector const &values, @@ -1048,7 +1048,7 @@ void set(gpu::GPUField const *pdf_field, namespace MomentumDensity { // LCOV_EXCL_START -__global__ void kernel_sum( +__global__ void kernel_get( gpu::FieldAccessor pdf, gpu::FieldAccessor force, double *RESTRICT out) { @@ -1086,9 +1086,9 @@ __global__ void kernel_sum( const double md_0 = force.get(0) * 0.50000000000000000 + momdensity_0; const double md_1 = force.get(1) * 0.50000000000000000 + momdensity_1; const double md_2 = force.get(2) * 0.50000000000000000 + momdensity_2; - out[0u] += md_0; - out[1u] += md_1; - out[2u] += md_2; + out[0u] = md_0; + out[1u] = md_1; + out[2u] = md_2; } } // LCOV_EXCL_STOP @@ -1096,19 +1096,22 @@ __global__ void kernel_sum( Vector3 reduce( gpu::GPUField const *pdf_field, gpu::GPUField const *force_field) { - thrust::device_vector dev_data(3u, double{0}); + auto const ci = pdf_field->xyzSize(); + thrust::device_vector dev_data(3u * ci.numCells()); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, { - Cell cell(x, y, z); - CellInterval ci(cell, cell); - auto kernel = gpu::make_kernel(kernel_sum); - kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); - kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); - kernel.addParam(dev_data_ptr); - kernel(); - }); + auto kernel = gpu::make_kernel(kernel_get); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(dev_data_ptr); + kernel(); + std::vector out(dev_data.size()); + thrust::copy(dev_data.begin(), dev_data.end(), out.data()); Vector3 mom(double{0}); - thrust::copy(dev_data.begin(), dev_data.begin() + 3u, mom.data()); + for (auto i = uint_t{0u}; i < 3u * ci.numCells(); i += 3u) { + mom[0u] += out[i + 0u]; + mom[1u] += out[i + 1u]; + mom[2u] += out[i + 2u]; + } return mom; } } // namespace MomentumDensity @@ -1191,6 +1194,34 @@ std::vector get( thrust::copy(dev_data.begin(), dev_data.end(), out.data()); return out; } + +Matrix3 reduce( + gpu::GPUField const *pdf_field) { + auto const ci = pdf_field->xyzSize(); + thrust::device_vector dev_data(9u * ci.numCells()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_get); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addParam(dev_data_ptr); + kernel(); + std::vector out(dev_data.size()); + thrust::copy(dev_data.begin(), dev_data.end(), out.data()); + Matrix3 pressureTensor(double{0}); + for (auto i = uint_t{0u}; i < 9u * ci.numCells(); i += 9u) { + pressureTensor[0u] += out[i + 0u]; + pressureTensor[1u] += out[i + 1u]; + pressureTensor[2u] += out[i + 2u]; + + pressureTensor[3u] += out[i + 3u]; + pressureTensor[4u] += out[i + 4u]; + pressureTensor[5u] += out[i + 5u]; + + pressureTensor[6u] += out[i + 6u]; + pressureTensor[7u] += out[i + 7u]; + pressureTensor[8u] += out[i + 8u]; + } + return pressureTensor; +} } // namespace PressureTensor } // namespace accessor diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cuh b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cuh index 6ac754f263..7435c52b71 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cuh +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cuh @@ -163,6 +163,7 @@ namespace PressureTensor { Matrix3 get(gpu::GPUField const *pdf_field, Cell const &cell); std::vector get(gpu::GPUField const *pdf_field, CellInterval const &ci); +Matrix3 reduce(gpu::GPUField const *pdf_field); } // namespace PressureTensor } // namespace accessor diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h index a7711c4f78..11043cefb8 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h @@ -1244,6 +1244,57 @@ inline auto get(GhostLayerField const *pdf_field, } return out; } + +inline auto reduce(GhostLayerField const *pdf_field) { + Matrix3 pressureTensor(float{0}); + WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, { + const float &xyz0 = pdf_field->get(x, y, z, uint_t{0u}); + const float f_0 = pdf_field->getF(&xyz0, uint_t{0u}); + const float f_1 = pdf_field->getF(&xyz0, uint_t{1u}); + const float f_2 = pdf_field->getF(&xyz0, uint_t{2u}); + const float f_3 = pdf_field->getF(&xyz0, uint_t{3u}); + const float f_4 = pdf_field->getF(&xyz0, uint_t{4u}); + const float f_5 = pdf_field->getF(&xyz0, uint_t{5u}); + const float f_6 = pdf_field->getF(&xyz0, uint_t{6u}); + const float f_7 = pdf_field->getF(&xyz0, uint_t{7u}); + const float f_8 = pdf_field->getF(&xyz0, uint_t{8u}); + const float f_9 = pdf_field->getF(&xyz0, uint_t{9u}); + const float f_10 = pdf_field->getF(&xyz0, uint_t{10u}); + const float f_11 = pdf_field->getF(&xyz0, uint_t{11u}); + const float f_12 = pdf_field->getF(&xyz0, uint_t{12u}); + const float f_13 = pdf_field->getF(&xyz0, uint_t{13u}); + const float f_14 = pdf_field->getF(&xyz0, uint_t{14u}); + const float f_15 = pdf_field->getF(&xyz0, uint_t{15u}); + const float f_16 = pdf_field->getF(&xyz0, uint_t{16u}); + const float f_17 = pdf_field->getF(&xyz0, uint_t{17u}); + const float f_18 = pdf_field->getF(&xyz0, uint_t{18u}); + const float p_0 = + f_10 + f_13 + f_14 + f_17 + f_18 + f_3 + f_4 + f_7 + f_8 + f_9; + const float p_1 = -f_10 - f_7 + f_8 + f_9; + const float p_2 = -f_13 + f_14 + f_17 - f_18; + const float p_3 = -f_10 - f_7 + f_8 + f_9; + const float p_4 = + f_1 + f_10 + f_11 + f_12 + f_15 + f_16 + f_2 + f_7 + f_8 + f_9; + const float p_5 = f_11 - f_12 - f_15 + f_16; + const float p_6 = -f_13 + f_14 + f_17 - f_18; + const float p_7 = f_11 - f_12 - f_15 + f_16; + const float p_8 = + f_11 + f_12 + f_13 + f_14 + f_15 + f_16 + f_17 + f_18 + f_5 + f_6; + + pressureTensor[0u] += p_0; + pressureTensor[1u] += p_1; + pressureTensor[2u] += p_2; + + pressureTensor[3u] += p_3; + pressureTensor[4u] += p_4; + pressureTensor[5u] += p_5; + + pressureTensor[6u] += p_6; + pressureTensor[7u] += p_7; + pressureTensor[8u] += p_8; + }); + return pressureTensor; +} } // namespace PressureTensor } // namespace accessor diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cu index e0c8dd8102..9631c3e844 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cu +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cu @@ -756,19 +756,6 @@ float get( return rho; } -void set( - gpu::GPUField *pdf_field, - const float rho, - Cell const &cell) { - CellInterval ci(cell, cell); - thrust::device_vector dev_data(1u, rho); - auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_set); - kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); - kernel.addParam(const_cast(dev_data_ptr)); - kernel(); -} - std::vector get( gpu::GPUField const *pdf_field, CellInterval const &ci) { @@ -783,6 +770,19 @@ std::vector get( return out; } +void set( + gpu::GPUField *pdf_field, + const float rho, + Cell const &cell) { + CellInterval ci(cell, cell); + thrust::device_vector dev_data(1u, rho); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_set); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addParam(const_cast(dev_data_ptr)); + kernel(); +} + void set( gpu::GPUField *pdf_field, std::vector const &values, @@ -1048,7 +1048,7 @@ void set(gpu::GPUField const *pdf_field, namespace MomentumDensity { // LCOV_EXCL_START -__global__ void kernel_sum( +__global__ void kernel_get( gpu::FieldAccessor pdf, gpu::FieldAccessor force, float *RESTRICT out) { @@ -1086,9 +1086,9 @@ __global__ void kernel_sum( const float md_0 = force.get(0) * 0.50000000000000000f + momdensity_0; const float md_1 = force.get(1) * 0.50000000000000000f + momdensity_1; const float md_2 = force.get(2) * 0.50000000000000000f + momdensity_2; - out[0u] += md_0; - out[1u] += md_1; - out[2u] += md_2; + out[0u] = md_0; + out[1u] = md_1; + out[2u] = md_2; } } // LCOV_EXCL_STOP @@ -1096,19 +1096,22 @@ __global__ void kernel_sum( Vector3 reduce( gpu::GPUField const *pdf_field, gpu::GPUField const *force_field) { - thrust::device_vector dev_data(3u, float{0}); + auto const ci = pdf_field->xyzSize(); + thrust::device_vector dev_data(3u * ci.numCells()); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, { - Cell cell(x, y, z); - CellInterval ci(cell, cell); - auto kernel = gpu::make_kernel(kernel_sum); - kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); - kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); - kernel.addParam(dev_data_ptr); - kernel(); - }); + auto kernel = gpu::make_kernel(kernel_get); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(dev_data_ptr); + kernel(); + std::vector out(dev_data.size()); + thrust::copy(dev_data.begin(), dev_data.end(), out.data()); Vector3 mom(float{0}); - thrust::copy(dev_data.begin(), dev_data.begin() + 3u, mom.data()); + for (auto i = uint_t{0u}; i < 3u * ci.numCells(); i += 3u) { + mom[0u] += out[i + 0u]; + mom[1u] += out[i + 1u]; + mom[2u] += out[i + 2u]; + } return mom; } } // namespace MomentumDensity @@ -1191,6 +1194,34 @@ std::vector get( thrust::copy(dev_data.begin(), dev_data.end(), out.data()); return out; } + +Matrix3 reduce( + gpu::GPUField const *pdf_field) { + auto const ci = pdf_field->xyzSize(); + thrust::device_vector dev_data(9u * ci.numCells()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_get); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addParam(dev_data_ptr); + kernel(); + std::vector out(dev_data.size()); + thrust::copy(dev_data.begin(), dev_data.end(), out.data()); + Matrix3 pressureTensor(float{0}); + for (auto i = uint_t{0u}; i < 9u * ci.numCells(); i += 9u) { + pressureTensor[0u] += out[i + 0u]; + pressureTensor[1u] += out[i + 1u]; + pressureTensor[2u] += out[i + 2u]; + + pressureTensor[3u] += out[i + 3u]; + pressureTensor[4u] += out[i + 4u]; + pressureTensor[5u] += out[i + 5u]; + + pressureTensor[6u] += out[i + 6u]; + pressureTensor[7u] += out[i + 7u]; + pressureTensor[8u] += out[i + 8u]; + } + return pressureTensor; +} } // namespace PressureTensor } // namespace accessor diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cuh b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cuh index 9d25a37a85..fd12e2e019 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cuh +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cuh @@ -159,6 +159,7 @@ namespace PressureTensor { Matrix3 get(gpu::GPUField const *pdf_field, Cell const &cell); std::vector get(gpu::GPUField const *pdf_field, CellInterval const &ci); +Matrix3 reduce(gpu::GPUField const *pdf_field); } // namespace PressureTensor } // namespace accessor diff --git a/testsuite/python/lb_pressure_tensor.py b/testsuite/python/lb_pressure_tensor.py index 59ff0f2b5d..d4fb48b231 100644 --- a/testsuite/python/lb_pressure_tensor.py +++ b/testsuite/python/lb_pressure_tensor.py @@ -43,19 +43,22 @@ class TestLBPressureTensor: system.time_step = params["tau"] system.cell_system.skin = 0 - def tearDown(self): - self.system.lb = None - self.system.thermostat.turn_off() + @classmethod + def tearDownClass(cls): + cls.system.lb = None + cls.system.thermostat.turn_off() - def setUp(self): + @classmethod + def setUpClass(cls): # Setup - self.lbf = self.lb_class(**self.params, **self.lb_params) - self.system.lb = self.lbf - self.system.thermostat.set_lb(LB_fluid=self.lbf, seed=42) + cls.lbf = cls.lb_class(**cls.params, **cls.lb_params) + cls.system.lb = cls.lbf # Warmup - self.system.integrator.run(500) + cls.system.integrator.run(500) + cls.sample_pressure(cls) + def sample_pressure(self): # Sampling self.p_global = np.zeros((self.steps, 3, 3)) self.p_node0 = np.zeros((self.steps, 3, 3)) @@ -188,10 +191,10 @@ def test_gk_viscosity(self): numeric_integral = np.trapz(acf[:len(ts)], dx=2 * self.params["tau"]) # fit tail - def f(x, a, b): return a * np.exp(-b * x) + def fit(x, a, b): return a * np.exp(-b * x) - (a, b), _ = scipy.optimize.curve_fit(f, acf[:len(ts)], ts) - tail = f(ts[-1], a, b) / b + (a, b), _ = scipy.optimize.curve_fit(fit, acf[:len(ts)], ts) + tail = fit(ts[-1], a, b) / b integral = numeric_integral + tail From dceb1d00a2c4793f1a229a2e8d636eac1d02fd0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Thu, 14 Nov 2024 23:24:28 +0100 Subject: [PATCH 2/3] Implement LB GPU VTK kernels --- .../src/lattice_boltzmann/LBWalberlaImpl.hpp | 105 +++++++++++++----- testsuite/python/CMakeLists.txt | 4 +- testsuite/python/lattice_vtk.py | 48 +++++--- testsuite/python/save_checkpoint.py | 29 +++-- testsuite/python/test_checkpoint.py | 1 - 5 files changed, 127 insertions(+), 60 deletions(-) diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp index 414433006f..3c1b11219f 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp +++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp @@ -40,6 +40,7 @@ #include #if defined(__CUDACC__) #include +#include #include #include #endif @@ -173,6 +174,10 @@ class LBWalberlaImpl : public LBWalberlaBase { using FlagField = typename BoundaryModel::FlagField; #if defined(__CUDACC__) using GPUField = gpu::GPUField; + using PdfFieldCpu = + typename FieldTrait::PdfField; + using VectorFieldCpu = + typename FieldTrait::VectorField; #endif struct GhostComm { @@ -302,7 +307,12 @@ class LBWalberlaImpl : public LBWalberlaBase { BlockDataID m_force_to_be_applied_id; BlockDataID m_velocity_field_id; - BlockDataID m_vec_tmp_field_id; + BlockDataID m_vel_tmp_field_id; + +#if defined(__CUDACC__) + std::optional m_pdf_cpu_field_id; + std::optional m_vel_cpu_field_id; +#endif /** Flag for boundary cells. */ FlagUID const Boundary_flag{"boundary"}; @@ -364,6 +374,10 @@ class LBWalberlaImpl : public LBWalberlaBase { // lattice std::shared_ptr m_lattice; +#if defined(__CUDACC__) + std::shared_ptr> m_host_field_allocator; +#endif + [[nodiscard]] std::optional get_interval(Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner) const { @@ -474,7 +488,11 @@ class LBWalberlaImpl : public LBWalberlaBase { m_last_applied_force_field_id = add_to_storage<_VectorField>("force last"); m_force_to_be_applied_id = add_to_storage<_VectorField>("force next"); m_velocity_field_id = add_to_storage<_VectorField>("velocity"); - m_vec_tmp_field_id = add_to_storage<_VectorField>("velocity_tmp"); + m_vel_tmp_field_id = add_to_storage<_VectorField>("velocity_tmp"); +#if defined(__CUDACC__) + m_host_field_allocator = + std::make_shared>(); +#endif // Initialize and register pdf field auto pdf_setter = @@ -768,13 +786,13 @@ class LBWalberlaImpl : public LBWalberlaBase { m_lees_edwards_callbacks->get_pos_offset); m_lees_edwards_vel_interpol_sweep = std::make_shared< InterpolateAndShiftAtBoundary<_VectorField, FloatType>>( - blocks, m_velocity_field_id, m_vec_tmp_field_id, n_ghost_layers, + blocks, m_velocity_field_id, m_vel_tmp_field_id, n_ghost_layers, shear_direction, shear_plane_normal, m_lees_edwards_callbacks->get_pos_offset, m_lees_edwards_callbacks->get_shear_velocity); m_lees_edwards_last_applied_force_interpol_sweep = std::make_shared< InterpolateAndShiftAtBoundary<_VectorField, FloatType>>( - blocks, m_last_applied_force_field_id, m_vec_tmp_field_id, + blocks, m_last_applied_force_field_id, m_vel_tmp_field_id, n_ghost_layers, shear_direction, shear_plane_normal, m_lees_edwards_callbacks->get_pos_offset); setup_streaming_communicator(); @@ -1577,13 +1595,9 @@ class LBWalberlaImpl : public LBWalberlaBase { } void register_vtk_field_filters(walberla::vtk::VTKOutput &vtk_obj) override { - if constexpr (Architecture == lbmpy::Arch::GPU) { - throw std::runtime_error("VTK output not supported for GPU"); - } else { - field::FlagFieldCellFilter fluid_filter(m_flag_field_id); - fluid_filter.addFlag(Boundary_flag); - vtk_obj.addCellExclusionFilter(fluid_filter); - } + field::FlagFieldCellFilter fluid_filter(m_flag_field_id); + fluid_filter.addFlag(Boundary_flag); + vtk_obj.addCellExclusionFilter(fluid_filter); } protected: @@ -1672,26 +1686,61 @@ class LBWalberlaImpl : public LBWalberlaBase { void register_vtk_field_writers(walberla::vtk::VTKOutput &vtk_obj, LatticeModel::units_map const &units, int flag_observables) override { - if constexpr (Architecture == lbmpy::Arch::GPU) { - throw std::runtime_error("VTK output not supported for GPU"); - } else { - if (flag_observables & static_cast(OutputVTK::density)) { - auto const unit_conversion = FloatType_c(units.at("density")); - vtk_obj.addCellDataWriter(std::make_shared>( - m_pdf_field_id, "density", unit_conversion)); +#if defined(__CUDACC__) + auto const allocate_cpu_field_if_empty = + [&](auto const &blocks, std::string name, + std::optional &cpu_field) { + if (not cpu_field) { + cpu_field = field::addToStorage( + blocks, name, FloatType{0}, field::fzyx, + m_lattice->get_ghost_layers(), m_host_field_allocator); + } + }; +#endif + if (flag_observables & static_cast(OutputVTK::density)) { + auto const unit_conversion = FloatType_c(units.at("density")); +#if defined(__CUDACC__) + if constexpr (Architecture == lbmpy::Arch::GPU) { + auto const &blocks = m_lattice->get_blocks(); + allocate_cpu_field_if_empty.template operator()( + blocks, "pdfs_cpu", m_pdf_cpu_field_id); + vtk_obj.addBeforeFunction(gpu::fieldCpyFunctor( + blocks, *m_pdf_cpu_field_id, m_pdf_field_id)); } - if (flag_observables & static_cast(OutputVTK::velocity_vector)) { - auto const unit_conversion = FloatType_c(units.at("velocity")); - vtk_obj.addCellDataWriter(std::make_shared>( - m_velocity_field_id, "velocity_vector", unit_conversion)); +#endif + vtk_obj.addCellDataWriter(std::make_shared>( + m_pdf_field_id, "density", unit_conversion)); + } + if (flag_observables & static_cast(OutputVTK::velocity_vector)) { + auto const unit_conversion = FloatType_c(units.at("velocity")); +#if defined(__CUDACC__) + if constexpr (Architecture == lbmpy::Arch::GPU) { + auto const &blocks = m_lattice->get_blocks(); + allocate_cpu_field_if_empty.template operator()( + blocks, "vel_cpu", m_vel_cpu_field_id); + vtk_obj.addBeforeFunction( + gpu::fieldCpyFunctor( + blocks, *m_vel_cpu_field_id, m_velocity_field_id)); } - if (flag_observables & static_cast(OutputVTK::pressure_tensor)) { - auto const unit_conversion = FloatType_c(units.at("pressure")); - vtk_obj.addCellDataWriter( - std::make_shared>( - m_pdf_field_id, "pressure_tensor", unit_conversion, - pressure_tensor_correction_factor())); +#endif + vtk_obj.addCellDataWriter(std::make_shared>( + m_velocity_field_id, "velocity_vector", unit_conversion)); + } + if (flag_observables & static_cast(OutputVTK::pressure_tensor)) { + auto const unit_conversion = FloatType_c(units.at("pressure")); +#if defined(__CUDACC__) + if constexpr (Architecture == lbmpy::Arch::GPU) { + auto const &blocks = m_lattice->get_blocks(); + allocate_cpu_field_if_empty.template operator()( + blocks, "pdfs_cpu", m_pdf_cpu_field_id); + vtk_obj.addBeforeFunction(gpu::fieldCpyFunctor( + blocks, *m_pdf_cpu_field_id, m_pdf_field_id)); } +#endif + vtk_obj.addCellDataWriter( + std::make_shared>( + m_pdf_field_id, "pressure_tensor", unit_conversion, + pressure_tensor_correction_factor())); } } diff --git a/testsuite/python/CMakeLists.txt b/testsuite/python/CMakeLists.txt index ee90256b07..12b21f8914 100644 --- a/testsuite/python/CMakeLists.txt +++ b/testsuite/python/CMakeLists.txt @@ -388,9 +388,9 @@ python_test(FILE utils.py MAX_NUM_PROC 1) python_test(FILE npt_thermostat.py MAX_NUM_PROC 4) python_test(FILE box_geometry.py MAX_NUM_PROC 1) python_test(FILE lattice.py MAX_NUM_PROC 4) -python_test(FILE lattice_vtk.py MAX_NUM_PROC 4) +python_test(FILE lattice_vtk.py MAX_NUM_PROC 4 GPU_SLOTS 1) if(${ESPRESSO_TEST_NP} GREATER_EQUAL 6) - python_test(FILE lattice_vtk.py MAX_NUM_PROC 6 SUFFIX 6_cores) + python_test(FILE lattice_vtk.py MAX_NUM_PROC 6 SUFFIX 6_cores GPU_SLOTS 1) endif() python_test(FILE ek_interface.py MAX_NUM_PROC 2) python_test(FILE ek_diffusion.py MAX_NUM_PROC 1) diff --git a/testsuite/python/lattice_vtk.py b/testsuite/python/lattice_vtk.py index 9b4e1981a1..d4420d8cbf 100644 --- a/testsuite/python/lattice_vtk.py +++ b/testsuite/python/lattice_vtk.py @@ -36,7 +36,7 @@ class TestVTK: - system = espressomd.System(box_l=[6, 7, 5]) + system = espressomd.System(box_l=[6, 7, 3]) system.time_step = 0.1 system.cell_system.skin = 0.4 @@ -135,7 +135,7 @@ def test_vtk(self): actor.add_boundary_from_shape( espressomd.shapes.Wall(normal=[-1, 0, 0], dist=-(self.system.box_l[0] - dist))) - n_steps = 10 + n_steps = 4 if self.lb_class is espressomd.lb.LBFluidWalberlaGPU else 10 shape = tuple(actor.shape) shape = (shape[0] - 4, *shape[1:]) vtk_reader = espressomd.io.vtk.VTKReader() @@ -329,42 +329,62 @@ def test_vtk(self): self.assertEqual(len(actor.vtk_writers), 0) -@utx.skipIfMissingFeatures("WALBERLA") -class LBWalberlaWrite(TestLBVTK, ut.TestCase): +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBWalberlaVTKDoublePrecisionCPU(TestLBVTK, ut.TestCase): vtk_class = espressomd.lb.VTKOutput lattice_class = espressomd.lb.LatticeWalberla lb_class = espressomd.lb.LBFluidWalberla lb_params = {"single_precision": False} - vtk_id = "lb_double_precision" + vtk_id = "lb_double_precision_cpu" -@utx.skipIfMissingFeatures("WALBERLA") -class LBWalberlaWriteSinglePrecision(TestLBVTK, ut.TestCase): +@utx.skipIfMissingGPU() +@utx.skipIfMissingFeatures(["WALBERLA", "CUDA"]) +class LBWalberlaVTKDoublePrecisionGPU(TestLBVTK, ut.TestCase): + vtk_class = espressomd.lb.VTKOutput + lattice_class = espressomd.lb.LatticeWalberla + lb_class = espressomd.lb.LBFluidWalberlaGPU + lb_params = {"single_precision": False} + vtk_id = "lb_double_precision_gpu" + + +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBWalberlaVTKSinglePrecisionCPU(TestLBVTK, ut.TestCase): vtk_class = espressomd.lb.VTKOutput lattice_class = espressomd.lb.LatticeWalberla lb_class = espressomd.lb.LBFluidWalberla lb_params = {"single_precision": True} - vtk_id = "lb_single_precision" + vtk_id = "lb_single_precision_cpu" + + +@utx.skipIfMissingGPU() +@utx.skipIfMissingFeatures(["WALBERLA", "CUDA"]) +class LBWalberlaVTKSinglePrecisionGPU(TestLBVTK, ut.TestCase): + vtk_class = espressomd.lb.VTKOutput + lattice_class = espressomd.lb.LatticeWalberla + lb_class = espressomd.lb.LBFluidWalberlaGPU + lb_params = {"single_precision": True} + vtk_id = "lb_single_precision_gpu" -@utx.skipIfMissingFeatures("WALBERLA") -class EKWalberlaWrite(TestEKVTK, ut.TestCase): +@utx.skipIfMissingFeatures(["WALBERLA"]) +class EKWalberlaVTKDoublePrecisionCPU(TestEKVTK, ut.TestCase): vtk_class = espressomd.electrokinetics.VTKOutput lattice_class = espressomd.electrokinetics.LatticeWalberla ek_class = espressomd.electrokinetics.EKSpecies ek_solver = espressomd.electrokinetics.EKNone ek_params = {"single_precision": False} - vtk_id = "ek_double_precision" + vtk_id = "ek_double_precision_cpu" -@utx.skipIfMissingFeatures("WALBERLA") -class EKWalberlaWriteSinglePrecision(TestEKVTK, ut.TestCase): +@utx.skipIfMissingFeatures(["WALBERLA"]) +class EKWalberlaVTKSinglePrecisionCPU(TestEKVTK, ut.TestCase): vtk_class = espressomd.electrokinetics.VTKOutput lattice_class = espressomd.electrokinetics.LatticeWalberla ek_class = espressomd.electrokinetics.EKSpecies ek_solver = espressomd.electrokinetics.EKNone ek_params = {"single_precision": True} - vtk_id = "ek_single_precision" + vtk_id = "ek_single_precision_cpu" if __name__ == "__main__": diff --git a/testsuite/python/save_checkpoint.py b/testsuite/python/save_checkpoint.py index 29e8d387de..28c5950a3c 100644 --- a/testsuite/python/save_checkpoint.py +++ b/testsuite/python/save_checkpoint.py @@ -405,21 +405,20 @@ vtk_suffix = config.test_name vtk_root = pathlib.Path("vtk_out") # create LB VTK callbacks - if 'LB.GPU' not in modes: # TODO WALBERLA - lb_vtk_auto_id = f"auto_lb_{vtk_suffix}" - lb_vtk_manual_id = f"manual_lb_{vtk_suffix}" - config.recursive_unlink(vtk_root / lb_vtk_auto_id) - config.recursive_unlink(vtk_root / lb_vtk_manual_id) - lb_vtk_auto = espressomd.lb.VTKOutput( - identifier=lb_vtk_auto_id, delta_N=1, - observables=('density', 'velocity_vector'), base_folder=str(vtk_root)) - lbf.add_vtk_writer(vtk=lb_vtk_auto) - lb_vtk_auto.disable() - lb_vtk_manual = espressomd.lb.VTKOutput( - identifier=lb_vtk_manual_id, delta_N=0, - observables=('density',), base_folder=str(vtk_root)) - lbf.add_vtk_writer(vtk=lb_vtk_manual) - lb_vtk_manual.write() + lb_vtk_auto_id = f"auto_lb_{vtk_suffix}" + lb_vtk_manual_id = f"manual_lb_{vtk_suffix}" + config.recursive_unlink(vtk_root / lb_vtk_auto_id) + config.recursive_unlink(vtk_root / lb_vtk_manual_id) + lb_vtk_auto = espressomd.lb.VTKOutput( + identifier=lb_vtk_auto_id, delta_N=1, + observables=('density', 'velocity_vector'), base_folder=str(vtk_root)) + lbf.add_vtk_writer(vtk=lb_vtk_auto) + lb_vtk_auto.disable() + lb_vtk_manual = espressomd.lb.VTKOutput( + identifier=lb_vtk_manual_id, delta_N=0, + observables=('density',), base_folder=str(vtk_root)) + lbf.add_vtk_writer(vtk=lb_vtk_manual) + lb_vtk_manual.write() # create EK VTK callbacks ek_vtk_auto_id = f"auto_ek_{vtk_suffix}" ek_vtk_manual_id = f"manual_ek_{vtk_suffix}" diff --git a/testsuite/python/test_checkpoint.py b/testsuite/python/test_checkpoint.py index 5f4b5f63f0..ed31ebc8d6 100644 --- a/testsuite/python/test_checkpoint.py +++ b/testsuite/python/test_checkpoint.py @@ -269,7 +269,6 @@ def generator(value, shape): np.copy(ek_species[:, :, :].is_boundary), False) @utx.skipIfMissingFeatures(["WALBERLA"]) - @ut.skipIf('LB.GPU' in modes, 'VTK not implemented for LB GPU') @ut.skipIf(not has_lb_mode, "Skipping test due to missing LB mode.") def test_lb_vtk(self): lbf = system.lb From 0a6d1bdec7bde5bfee0e775535883a667fd4ae62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Fri, 15 Nov 2024 18:53:37 +0100 Subject: [PATCH 3/3] Fix thermostats Properly handle null values for kT and friction coefficients. Make friction coefficients required parameters. --- .../electrokinetics/electrokinetics.ipynb | 2 +- samples/lb_profile.py | 1 - src/core/lb/particle_coupling.cpp | 27 ++++--- src/core/thermostat.hpp | 3 + .../thermostat/thermostat.hpp | 70 +++++++++++++++---- testsuite/python/lb.py | 5 +- testsuite/python/lb_interpolation.py | 39 ++++++----- testsuite/python/lb_pressure_tensor.py | 1 - testsuite/python/lb_thermostat.py | 2 + 9 files changed, 104 insertions(+), 46 deletions(-) diff --git a/doc/tutorials/electrokinetics/electrokinetics.ipynb b/doc/tutorials/electrokinetics/electrokinetics.ipynb index 32e5f952b8..3fe674686e 100644 --- a/doc/tutorials/electrokinetics/electrokinetics.ipynb +++ b/doc/tutorials/electrokinetics/electrokinetics.ipynb @@ -1042,7 +1042,7 @@ " lattice=lattice, density=DENSITY_FLUID, kinematic_viscosity=VISCOSITY_KINEMATIC,\n", " tau=TAU, ext_force_density=EXT_FORCE_DENSITY, kT=KT, seed=42)\n", "system.lb = lbf\n", - "system.thermostat.set_lb(LB_fluid=lbf, seed=42)\n", + "system.thermostat.set_lb(LB_fluid=lbf, seed=42, gamma=0.)\n", "eksolver = espressomd.electrokinetics.EKNone(lattice=lattice)\n", "system.ekcontainer = espressomd.electrokinetics.EKContainer(tau=TAU, solver=eksolver)" ] diff --git a/samples/lb_profile.py b/samples/lb_profile.py index 763bd7f95a..6b47778f29 100644 --- a/samples/lb_profile.py +++ b/samples/lb_profile.py @@ -45,7 +45,6 @@ agrid=1.0, density=1.0, kinematic_viscosity=1.0, tau=0.01, ext_force_density=[0, 0, 0.15], kT=0.0) system.lb = lb_fluid -system.thermostat.set_lb(LB_fluid=lb_fluid, seed=23) ctp = espressomd.math.CylindricalTransformationParameters( center=[5.0, 5.0, 0.0], axis=[0, 0, 1], diff --git a/src/core/lb/particle_coupling.cpp b/src/core/lb/particle_coupling.cpp index 648384f1f6..fad6bb64b8 100644 --- a/src/core/lb/particle_coupling.cpp +++ b/src/core/lb/particle_coupling.cpp @@ -291,18 +291,23 @@ void ParticleCoupling::kernel(std::vector const &particles) { #endif Utils::Vector3d force_on_particle = {}; if (coupling_mode == particle_force) { - auto &v_fluid = *it_interpolated_velocities; - if (m_box_geo.type() == BoxType::LEES_EDWARDS) { - // Account for the case where the interpolated velocity has been read - // from a ghost of the particle across the LE boundary (or vice verssa) - // Then the particle velocity is shifted by +,- the LE shear velocity - auto const vel_correction = lees_edwards_vel_shift( - *it_positions_velocity_coupling, p.pos(), m_box_geo); - v_fluid += vel_correction; +#ifndef THERMOSTAT_PER_PARTICLE + if (m_thermostat.gamma > 0.) +#endif + { + auto &v_fluid = *it_interpolated_velocities; + if (m_box_geo.type() == BoxType::LEES_EDWARDS) { + // Account for the case where the interpolated velocity has been read + // from a ghost of the particle across the LE boundary (or vice versa) + // Then the particle velocity is shifted by +,- the LE shear velocity + auto const vel_correction = lees_edwards_vel_shift( + *it_positions_velocity_coupling, p.pos(), m_box_geo); + v_fluid += vel_correction; + } + auto const drag_force = lb_drag_force(p, m_thermostat.gamma, v_fluid); + auto const random_force = get_noise_term(p); + force_on_particle = drag_force + random_force; } - auto const drag_force = lb_drag_force(p, m_thermostat.gamma, v_fluid); - auto const random_force = get_noise_term(p); - force_on_particle = drag_force + random_force; ++it_interpolated_velocities; ++it_positions_velocity_coupling; } diff --git a/src/core/thermostat.hpp b/src/core/thermostat.hpp index 121c6a4ebd..8d5781257c 100644 --- a/src/core/thermostat.hpp +++ b/src/core/thermostat.hpp @@ -97,6 +97,9 @@ inline bool are_kT_equal(double old_kT, double new_kT) { } auto const large_kT = (old_kT > new_kT) ? old_kT : new_kT; auto const small_kT = (old_kT > new_kT) ? new_kT : old_kT; + if (small_kT == 0.) { + return false; + } return (large_kT / small_kT - 1. < relative_tolerance); } } // namespace Thermostat diff --git a/src/script_interface/thermostat/thermostat.hpp b/src/script_interface/thermostat/thermostat.hpp index 57318778f1..bb63ad38a3 100644 --- a/src/script_interface/thermostat/thermostat.hpp +++ b/src/script_interface/thermostat/thermostat.hpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -92,7 +93,7 @@ class Interface : public AutoParameters, System::Leaf> { } virtual bool invalid_rng_state(VariantMap const ¶ms) const { - return (not params.count("seed") or is_none(params.at("seed"))) and + return (not params.contains("seed") or is_none(params.at("seed"))) and is_seed_required(); } @@ -101,14 +102,15 @@ class Interface : public AutoParameters, System::Leaf> { get_member_handle(::Thermostat::Thermostat &thermostat) = 0; void set_new_parameters(VariantMap const ¶ms) { - if (params.count("__check_rng_state") and invalid_rng_state(params)) { - context()->parallel_try_catch([]() { + context()->parallel_try_catch([&]() { + if (params.contains("__check_rng_state") and invalid_rng_state(params)) { throw std::invalid_argument("Parameter 'seed' is needed on first " "activation of the thermostat"); - }); - } + } + check_required_parameters(params); + }); for (auto const &key : get_parameter_insertion_order()) { - if (params.count(key)) { + if (params.contains(key)) { auto const &v = params.at(key); if (key == "is_active") { is_active = get_value(v); @@ -119,6 +121,17 @@ class Interface : public AutoParameters, System::Leaf> { } } + virtual std::span get_required_parameters() const = 0; + + void check_required_parameters(VariantMap const ¶ms) const { + for (auto const &required : get_required_parameters()) { + auto name = std::string(required); + if (not params.contains(name)) { + throw std::runtime_error("Parameter '" + name + "' is missing"); + } + } + } + protected: template auto make_autoparameter(T CoreThermostat::*member, char const *name) { @@ -169,7 +182,7 @@ class Interface : public AutoParameters, System::Leaf> { auto params = parameters; if (not is_seed_required()) { for (auto key : {std::string("seed"), std::string("philox_counter")}) { - if (params.count(key) == 0ul) { + if (not params.contains(key)) { params[key] = get_parameter(key); } } @@ -216,7 +229,7 @@ class Interface : public AutoParameters, System::Leaf> { } virtual std::optional extract_kT(VariantMap const ¶ms) const { - if (params.count("kT")) { + if (params.contains("kT")) { auto const value = get_value(params, "kT"); sanity_checks_positive(value, "kT"); return value; @@ -301,6 +314,11 @@ class Langevin : public Interface<::LangevinThermostat> { return thermostat.langevin; } + std::span get_required_parameters() const override { + static constexpr std::array names{std::string_view("gamma")}; + return names; + } + public: Langevin() { add_parameters({ @@ -319,7 +337,7 @@ class Langevin : public Interface<::LangevinThermostat> { Interface<::LangevinThermostat>::extend_parameters(parameters); #ifdef ROTATION // If gamma_rotation is not set explicitly, use the translational one. - if (params.count("gamma_rotation") == 0ul and params.count("gamma")) { + if (not params.contains("gamma_rotation") and params.contains("gamma")) { params["gamma_rotation"] = params.at("gamma"); } #endif // ROTATION @@ -333,6 +351,11 @@ class Brownian : public Interface<::BrownianThermostat> { return thermostat.brownian; } + std::span get_required_parameters() const override { + static constexpr std::array names{std::string_view("gamma")}; + return names; + } + public: Brownian() { add_parameters({ @@ -351,7 +374,7 @@ class Brownian : public Interface<::BrownianThermostat> { Interface<::BrownianThermostat>::extend_parameters(parameters); #ifdef ROTATION // If gamma_rotation is not set explicitly, use the translational one. - if (params.count("gamma_rotation") == 0ul and params.count("gamma")) { + if (not params.contains("gamma_rotation") and params.contains("gamma")) { params["gamma_rotation"] = params.at("gamma"); } #endif // ROTATION @@ -366,6 +389,12 @@ class IsotropicNpt : public Interface<::IsotropicNptThermostat> { return thermostat.npt_iso; } + std::span get_required_parameters() const override { + static constexpr std::array names{std::string_view("gamma0"), + std::string_view("gammav")}; + return names; + } + public: IsotropicNpt() { add_parameters({ @@ -418,10 +447,15 @@ class LBThermostat : public Interface<::LBThermostat> { protected: bool invalid_rng_state(VariantMap const ¶ms) const override { - return (not params.count("seed") or is_none(params.at("seed"))) and - params.count("__global_kT") and is_seed_required() and + return (not params.contains("seed") or is_none(params.at("seed"))) and + params.contains("__global_kT") and is_seed_required() and get_value(params, "__global_kT") != 0.; } + + std::span get_required_parameters() const override { + static constexpr std::array names{std::string_view("gamma")}; + return names; + } }; #endif // WALBERLA @@ -432,6 +466,10 @@ class DPDThermostat : public Interface<::DPDThermostat> { return thermostat.dpd; } + std::span get_required_parameters() const override { + return {}; + } + public: ::ThermostatFlags get_thermo_flag() const final { return THERMO_DPD; } }; @@ -444,6 +482,10 @@ class Stokesian : public Interface<::StokesianThermostat> { return thermostat.stokesian; } + std::span get_required_parameters() const override { + return {}; + } + public: ::ThermostatFlags get_thermo_flag() const final { return THERMO_SD; } }; @@ -455,6 +497,10 @@ class ThermalizedBond : public Interface<::ThermalizedBondThermostat> { return thermostat.thermalized_bond; } + std::span get_required_parameters() const override { + return {}; + } + public: ::ThermostatFlags get_thermo_flag() const final { return THERMO_BOND; } }; diff --git a/testsuite/python/lb.py b/testsuite/python/lb.py index 4e585d5f08..a16ff6c5a3 100644 --- a/testsuite/python/lb.py +++ b/testsuite/python/lb.py @@ -54,7 +54,6 @@ class LBTest: system.cell_system.skin = 1.0 if espressomd.gpu_available(): system.cuda_init_handle.call_method("set_device_id_per_rank") - interpolation = False n_nodes = system.cell_system.get_state()["n_nodes"] def setUp(self): @@ -75,7 +74,6 @@ def test_properties(self): # activated actor lbf = self.lb_class(kT=1.0, seed=42, **self.params, **self.lb_params) self.system.lb = lbf - self.system.thermostat.set_lb(LB_fluid=lbf, seed=1) self.assertTrue(lbf.is_active) self.check_properties(lbf) self.system.lb = None @@ -83,7 +81,6 @@ def test_properties(self): # deactivated actor lbf = self.lb_class(kT=1.0, seed=42, **self.params, **self.lb_params) self.system.lb = lbf - self.system.thermostat.set_lb(LB_fluid=lbf, seed=1) self.system.lb = None self.assertFalse(lbf.is_active) self.check_properties(lbf) @@ -383,7 +380,7 @@ def test_pressure_tensor_observable(self): lbf = self.lb_class(kT=1., seed=1, ext_force_density=[0, 0, 0], **self.params, **self.lb_params) system.lb = lbf - system.thermostat.set_lb(LB_fluid=lbf, seed=1) + system.thermostat.set_lb(LB_fluid=lbf, seed=1, gamma=1.) system.integrator.run(10) pressure_tensor = np.copy( np.mean(lbf[:, :, :].pressure_tensor, axis=(0, 1, 2))) diff --git a/testsuite/python/lb_interpolation.py b/testsuite/python/lb_interpolation.py index 96e93523b5..3a2bd9a491 100644 --- a/testsuite/python/lb_interpolation.py +++ b/testsuite/python/lb_interpolation.py @@ -128,26 +128,33 @@ def test_mach_limit_check(self): self.assertIsNone(self.lbf[0, 0, 0].boundary) def test_interpolated_force(self): + def sample(kernel): + for i, j, k in itertools.product(range(3), range(3), range(3)): + system.lb[:, :, :].velocity = lb_vel + p.pos = ( + (i + 0.5) * AGRID, + (j + 0.5) * AGRID, + (k + 0.5) * AGRID) + p.v = [0., 0., 0.] + system.integrator.run(1) + np.testing.assert_allclose(np.copy(p.f), kernel(i, j, k)) system = self.system - system.thermostat.set_lb(LB_fluid=system.lb, seed=42, gamma=1.) system.integrator.run(1) lb_vel = np.zeros([12, 12, 12, 3], dtype=float) - for i in range(12): - for j in range(12): - for k in range(12): - lb_vel[i, j, k] = 1e-3 * np.array([i + 1, j + 1, k + 1]) + for i, j, k in itertools.product(range(12), range(12), range(12)): + lb_vel[i, j, k] = 1e-3 * np.array([i + 1, j + 1, k + 1]) p = system.part.add(pos=3 * [1.5]) - for i in range(3): - for j in range(3): - for k in range(3): - system.lb[:, :, :].velocity = lb_vel - p.pos = ( - (i + 0.5) * AGRID, - (j + 0.5) * AGRID, - (k + 0.5) * AGRID) - p.v = [0., 0., 0.] - system.integrator.run(1) - np.testing.assert_allclose(np.copy(p.f), lb_vel[i, j, k]) + # enable particle coupling + system.thermostat.set_lb(LB_fluid=system.lb, seed=42, gamma=1.) + sample(kernel=lambda i, j, k: lb_vel[i, j, k]) + # disable particle coupling + system.thermostat.set_lb(LB_fluid=system.lb, seed=42, gamma=0.) + sample(kernel=lambda i, j, k: [0., 0., 0.]) + if espressomd.has_features("THERMOSTAT_PER_PARTICLE"): + # disable particle coupling globally & enable per-particle coupling + system.thermostat.set_lb(LB_fluid=system.lb, seed=42, gamma=0.) + p.gamma = 1.5 + sample(kernel=lambda i, j, k: 1.5 * lb_vel[i, j, k]) @utx.skipIfMissingFeatures(["WALBERLA"]) diff --git a/testsuite/python/lb_pressure_tensor.py b/testsuite/python/lb_pressure_tensor.py index d4fb48b231..0c0b4df3a6 100644 --- a/testsuite/python/lb_pressure_tensor.py +++ b/testsuite/python/lb_pressure_tensor.py @@ -46,7 +46,6 @@ class TestLBPressureTensor: @classmethod def tearDownClass(cls): cls.system.lb = None - cls.system.thermostat.turn_off() @classmethod def setUpClass(cls): diff --git a/testsuite/python/lb_thermostat.py b/testsuite/python/lb_thermostat.py index 77677bb96e..53de0ea8ca 100644 --- a/testsuite/python/lb_thermostat.py +++ b/testsuite/python/lb_thermostat.py @@ -208,6 +208,8 @@ def test_exceptions(self): with self.assertRaisesRegex(RuntimeError, r"set_lb\(\) got an unexpected keyword argument 'act_on_virtual'"): self.system.thermostat.set_lb( LB_fluid=self.lbf, act_on_virtual=False) + with self.assertRaisesRegex(RuntimeError, "Parameter 'gamma' is missing"): + self.system.thermostat.set_lb(LB_fluid=self.lbf) self.system.part.add(pos=[0., 0., 0.], gamma=[1., 2., 3.], id=2) with self.assertRaisesRegex(Exception, r"ERROR: anisotropic particle \(id 2\) coupled to LB"): self.system.integrator.run(1)