From 38952c9526dde9f630e05b8d5bcba9cb42b00336 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Mon, 29 Mar 2021 19:03:58 +0200 Subject: [PATCH 1/7] core: Use relative tolerance The floating-point precision depends on the size of the box. --- src/core/grid_based_algorithms/lbgpu.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/core/grid_based_algorithms/lbgpu.cpp b/src/core/grid_based_algorithms/lbgpu.cpp index aa30a96c9ed..55d6208ef32 100644 --- a/src/core/grid_based_algorithms/lbgpu.cpp +++ b/src/core/grid_based_algorithms/lbgpu.cpp @@ -246,13 +246,14 @@ void lb_set_agrid_gpu(double agrid) { /* sanity checks */ for (int dir = 0; dir < 3; dir++) { /* check if box_l is compatible with lattice spacing */ - if (fabs(box_geo.length()[dir] - static_cast(tmp[dir]) * agrid) > - ROUND_ERROR_PREC) { - runtimeErrorMsg() << "Lattice spacing p_agrid= " << agrid + auto const box_l = static_cast(box_geo.length()[dir]); + auto const tolerance = 5.f * std::nextafter(box_l, box_l + 1.f) - box_l; + auto const difference = fabs(box_l - static_cast(tmp[dir] * agrid)); + if (difference > tolerance) { + runtimeErrorMsg() << "Lattice spacing agrid= " << agrid << " is incompatible with box_l[" << dir << "]=" << box_geo.length()[dir] - << ", factor=" << tmp[dir] << " err= " - << fabs(box_geo.length()[dir] - tmp[dir] * agrid); + << ", n_nodes=" << tmp[dir] << " err= " << difference; } } lbpar_gpu.number_of_nodes = From 0604c35b133ea19e724a228000ee16a84f8b8623 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Mon, 29 Mar 2021 19:06:10 +0200 Subject: [PATCH 2/7] core: Remove code duplication --- src/core/grid_based_algorithms/lbgpu.cpp | 35 ++++-------------------- 1 file changed, 6 insertions(+), 29 deletions(-) diff --git a/src/core/grid_based_algorithms/lbgpu.cpp b/src/core/grid_based_algorithms/lbgpu.cpp index 55d6208ef32..3d9f877c3b1 100644 --- a/src/core/grid_based_algorithms/lbgpu.cpp +++ b/src/core/grid_based_algorithms/lbgpu.cpp @@ -35,6 +35,8 @@ #include "integrate.hpp" #include "lb-d3q19.hpp" +#include + #include #include @@ -161,38 +163,13 @@ void lb_reinit_parameters_gpu() { /* Eq. (51) @cite dunweg07a.*/ /* Note that the modes are not normalized as in the paper here! */ - lbpar_gpu.mu = lbpar_gpu.kT * lbpar_gpu.tau * lbpar_gpu.tau / - D3Q19::c_sound_sq / - (lbpar_gpu.agrid * lbpar_gpu.agrid); + lbpar_gpu.mu = lbpar_gpu.kT * Utils::sqr(lbpar_gpu.tau) / + D3Q19::c_sound_sq / Utils::sqr(lbpar_gpu.agrid); } #ifdef ELECTROKINETICS if (ek_initialized) { - lbpar_gpu.dim_x = - static_cast(round(box_geo.length()[0] / lbpar_gpu.agrid)); - lbpar_gpu.dim_y = - static_cast(round(box_geo.length()[1] / lbpar_gpu.agrid)); - lbpar_gpu.dim_z = - static_cast(round(box_geo.length()[2] / lbpar_gpu.agrid)); - - unsigned int tmp[3]; - tmp[0] = lbpar_gpu.dim_x; - tmp[1] = lbpar_gpu.dim_y; - tmp[2] = lbpar_gpu.dim_z; - - /* sanity checks */ - for (int dir = 0; dir < 3; dir++) { - /* check if box_l is compatible with lattice spacing */ - if (fabs(box_geo.length()[dir] - - static_cast(tmp[dir]) * lbpar_gpu.agrid) > 1.0e-3) { - runtimeErrorMsg() << "Lattice spacing lbpar_gpu.agrid= " - << lbpar_gpu.agrid << " is incompatible with box_l[" - << dir << "]=" << box_geo.length()[dir]; - } - } - - lbpar_gpu.number_of_nodes = - lbpar_gpu.dim_x * lbpar_gpu.dim_y * lbpar_gpu.dim_z; + lb_set_agrid_gpu(lbpar_gpu.agrid); lbpar_gpu.tau = static_cast(time_step); } #endif @@ -247,7 +224,7 @@ void lb_set_agrid_gpu(double agrid) { for (int dir = 0; dir < 3; dir++) { /* check if box_l is compatible with lattice spacing */ auto const box_l = static_cast(box_geo.length()[dir]); - auto const tolerance = 5.f * std::nextafter(box_l, box_l + 1.f) - box_l; + auto const tolerance = 5.f * (std::nextafter(box_l, box_l + 1.f) - box_l); auto const difference = fabs(box_l - static_cast(tmp[dir] * agrid)); if (difference > tolerance) { runtimeErrorMsg() << "Lattice spacing agrid= " << agrid From 8216180a6e23c8675b92329370576fc356527fd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Mon, 29 Mar 2021 19:50:27 +0200 Subject: [PATCH 3/7] core: Fix LB GPU grid size bug Always regenerate the LB grid dimensions on parameter change. --- src/core/grid_based_algorithms/lbgpu.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/grid_based_algorithms/lbgpu.cpp b/src/core/grid_based_algorithms/lbgpu.cpp index 3d9f877c3b1..42b59983a0d 100644 --- a/src/core/grid_based_algorithms/lbgpu.cpp +++ b/src/core/grid_based_algorithms/lbgpu.cpp @@ -167,9 +167,10 @@ void lb_reinit_parameters_gpu() { D3Q19::c_sound_sq / Utils::sqr(lbpar_gpu.agrid); } + lb_set_agrid_gpu(lbpar_gpu.agrid); + #ifdef ELECTROKINETICS if (ek_initialized) { - lb_set_agrid_gpu(lbpar_gpu.agrid); lbpar_gpu.tau = static_cast(time_step); } #endif From 37197b382235eaa541a959a42ac557041382c8f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Mon, 29 Mar 2021 20:08:19 +0200 Subject: [PATCH 4/7] tests: Check LB grid resize --- testsuite/python/lb.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/testsuite/python/lb.py b/testsuite/python/lb.py index 59ec5c7f510..474679eaa03 100644 --- a/testsuite/python/lb.py +++ b/testsuite/python/lb.py @@ -56,6 +56,7 @@ def tearDown(self): self.system.actors.clear() self.system.part.clear() self.system.thermostat.turn_off() + self.system.box_l = 3 * [6.0] def test_properties(self): self.lbf = self.lb_class( @@ -167,10 +168,8 @@ def test_lb_node_set_get(self): ext_force_density=[0, 0, 0]) self.system.actors.add(self.lbf) - self.assertEqual(self.lbf.shape, - (int(self.system.box_l[0] / self.params["agrid"]), - int(self.system.box_l[1] / self.params["agrid"]), - int(self.system.box_l[2] / self.params["agrid"]))) + shape_ref = np.copy(self.system.box_l) / self.params['agrid'] + np.testing.assert_array_equal(self.lbf.shape, shape_ref.astype(int)) v_fluid = np.array([1.2, 4.3, 0.2]) self.lbf[0, 0, 0].velocity = v_fluid @@ -221,6 +220,12 @@ def test_grid_index(self): _ = self.lbf[0, out_of_bounds, 0].velocity with self.assertRaises(ValueError): _ = self.lbf[0, 0, out_of_bounds].velocity + # resize system + self.system.box_l = self.system.box_l + 1. + shape_ref = np.copy(self.system.box_l) / self.params['agrid'] + np.testing.assert_array_equal(self.lbf.shape, shape_ref.astype(int)) + np.testing.assert_array_equal( + np.copy(self.lbf[out_of_bounds, 0, 0].velocity), 0.) def test_incompatible_agrid(self): """ From 5a6592ca8dac82586a54c94ec7baef6dcc9b6cf2 Mon Sep 17 00:00:00 2001 From: Kai Szuttor Date: Tue, 30 Mar 2021 10:08:16 +0200 Subject: [PATCH 5/7] LB: more Array. --- .../electrokinetics_cuda.cu | 64 ++++---- .../grid_based_algorithms/lb_boundaries.cpp | 15 +- .../grid_based_algorithms/lb_interface.cpp | 70 ++++----- src/core/grid_based_algorithms/lbgpu.cpp | 15 +- src/core/grid_based_algorithms/lbgpu.hpp | 10 +- src/core/grid_based_algorithms/lbgpu_cuda.cu | 139 +++++++++--------- .../lb_inertialess_tracers_cuda.cu | 84 ++++++----- 7 files changed, 200 insertions(+), 197 deletions(-) diff --git a/src/core/grid_based_algorithms/electrokinetics_cuda.cu b/src/core/grid_based_algorithms/electrokinetics_cuda.cu index 119d828d0f4..ad291810c7a 100644 --- a/src/core/grid_based_algorithms/electrokinetics_cuda.cu +++ b/src/core/grid_based_algorithms/electrokinetics_cuda.cu @@ -1811,12 +1811,12 @@ ek_gather_particle_charge_density(CUDA_particle_data *particle_data, lowernode[2] = static_cast(floorf(gridpos)); cellpos[2] = gridpos - static_cast(lowernode[2]); - lowernode[0] = (lowernode[0] + ek_lbparameters_gpu->dim_x) % - ek_lbparameters_gpu->dim_x; - lowernode[1] = (lowernode[1] + ek_lbparameters_gpu->dim_y) % - ek_lbparameters_gpu->dim_y; - lowernode[2] = (lowernode[2] + ek_lbparameters_gpu->dim_z) % - ek_lbparameters_gpu->dim_z; + lowernode[0] = (lowernode[0] + ek_lbparameters_gpu->dim[0]) % + ek_lbparameters_gpu->dim[0]; + lowernode[1] = (lowernode[1] + ek_lbparameters_gpu->dim[1]) % + ek_lbparameters_gpu->dim[1]; + lowernode[2] = (lowernode[2] + ek_lbparameters_gpu->dim[2]) % + ek_lbparameters_gpu->dim[2]; atomicAdd(&((cufftReal *)ek_parameters_gpu ->charge_potential)[rhoindex_cartesian2linear_padded( @@ -1898,12 +1898,12 @@ ek_spread_particle_force(CUDA_particle_data *particle_data, lowernode[2] = static_cast(floorf(gridpos)); cellpos[2] = gridpos - static_cast(lowernode[2]); - lowernode[0] = (lowernode[0] + ek_lbparameters_gpu->dim_x) % - ek_lbparameters_gpu->dim_x; - lowernode[1] = (lowernode[1] + ek_lbparameters_gpu->dim_y) % - ek_lbparameters_gpu->dim_y; - lowernode[2] = (lowernode[2] + ek_lbparameters_gpu->dim_z) % - ek_lbparameters_gpu->dim_z; + lowernode[0] = (lowernode[0] + ek_lbparameters_gpu->dim[0]) % + ek_lbparameters_gpu->dim[0]; + lowernode[1] = (lowernode[1] + ek_lbparameters_gpu->dim[1]) % + ek_lbparameters_gpu->dim[1]; + lowernode[2] = (lowernode[2] + ek_lbparameters_gpu->dim[2]) % + ek_lbparameters_gpu->dim[2]; float efield[3] = {0., 0., 0.}; for (unsigned int dim = 0; dim < 3; ++dim) { @@ -1921,7 +1921,7 @@ ek_spread_particle_force(CUDA_particle_data *particle_data, ->electric_field[3 * rhoindex_cartesian2linear( lowernode[0], lowernode[1], (lowernode[2] + 1) % - ek_lbparameters_gpu->dim_z) + + ek_lbparameters_gpu->dim[2]) + dim] * (1 - cellpos[0]) * (1 - cellpos[1]) * cellpos[2]; @@ -1931,7 +1931,7 @@ ek_spread_particle_force(CUDA_particle_data *particle_data, ->electric_field[3 * rhoindex_cartesian2linear( lowernode[0], (lowernode[1] + 1) % - ek_lbparameters_gpu->dim_y, + ek_lbparameters_gpu->dim[1], lowernode[2]) + dim] * (1 - cellpos[0]) * cellpos[1] * (1 - cellpos[2]); @@ -1941,8 +1941,8 @@ ek_spread_particle_force(CUDA_particle_data *particle_data, ek_parameters_gpu->electric_field [3 * rhoindex_cartesian2linear( lowernode[0], - (lowernode[1] + 1) % ek_lbparameters_gpu->dim_y, - (lowernode[2] + 1) % ek_lbparameters_gpu->dim_z) + + (lowernode[1] + 1) % ek_lbparameters_gpu->dim[1], + (lowernode[2] + 1) % ek_lbparameters_gpu->dim[2]) + dim] * (1 - cellpos[0]) * cellpos[1] * cellpos[2]; @@ -1951,7 +1951,7 @@ ek_spread_particle_force(CUDA_particle_data *particle_data, ek_parameters_gpu ->electric_field[3 * rhoindex_cartesian2linear( (lowernode[0] + 1) % - ek_lbparameters_gpu->dim_x, + ek_lbparameters_gpu->dim[0], lowernode[1], lowernode[2]) + dim] * cellpos[0] * (1 - cellpos[1]) * (1 - cellpos[2]); @@ -1960,9 +1960,9 @@ ek_spread_particle_force(CUDA_particle_data *particle_data, efield[dim] += ek_parameters_gpu->electric_field [3 * rhoindex_cartesian2linear( - (lowernode[0] + 1) % ek_lbparameters_gpu->dim_x, + (lowernode[0] + 1) % ek_lbparameters_gpu->dim[0], lowernode[1], - (lowernode[2] + 1) % ek_lbparameters_gpu->dim_z) + + (lowernode[2] + 1) % ek_lbparameters_gpu->dim[2]) + dim] * cellpos[0] * (1 - cellpos[1]) * cellpos[2]; @@ -1970,8 +1970,8 @@ ek_spread_particle_force(CUDA_particle_data *particle_data, efield[dim] += ek_parameters_gpu->electric_field [3 * rhoindex_cartesian2linear( - (lowernode[0] + 1) % ek_lbparameters_gpu->dim_x, - (lowernode[1] + 1) % ek_lbparameters_gpu->dim_y, + (lowernode[0] + 1) % ek_lbparameters_gpu->dim[0], + (lowernode[1] + 1) % ek_lbparameters_gpu->dim[1], lowernode[2]) + dim] * cellpos[0] * cellpos[1] * (1 - cellpos[2]); @@ -1980,9 +1980,9 @@ ek_spread_particle_force(CUDA_particle_data *particle_data, efield[dim] += ek_parameters_gpu->electric_field [3 * rhoindex_cartesian2linear( - (lowernode[0] + 1) % ek_lbparameters_gpu->dim_x, - (lowernode[1] + 1) % ek_lbparameters_gpu->dim_y, - (lowernode[2] + 1) % ek_lbparameters_gpu->dim_z) + + (lowernode[0] + 1) % ek_lbparameters_gpu->dim[0], + (lowernode[1] + 1) % ek_lbparameters_gpu->dim[1], + (lowernode[2] + 1) % ek_lbparameters_gpu->dim[2]) + dim] * cellpos[0] * cellpos[1] * cellpos[2]; } @@ -2281,10 +2281,10 @@ int ek_init() { lbpar_gpu.ext_force_density[2] = 0; } - ek_parameters.dim_x = lbpar_gpu.dim_x; + ek_parameters.dim_x = lbpar_gpu.dim[0]; ek_parameters.dim_x_padded = (ek_parameters.dim_x / 2 + 1) * 2; - ek_parameters.dim_y = lbpar_gpu.dim_y; - ek_parameters.dim_z = lbpar_gpu.dim_z; + ek_parameters.dim_y = lbpar_gpu.dim[1]; + ek_parameters.dim_z = lbpar_gpu.dim[2]; ek_parameters.time_step = lbpar_gpu.time_step; ek_parameters.number_of_nodes = ek_parameters.dim_x * ek_parameters.dim_y * ek_parameters.dim_z; @@ -2469,7 +2469,7 @@ SPACING %f %f %f\n\ \nPOINT_DATA %u\n\ SCALARS velocity float 3\n\ LOOKUP_TABLE default\n", - lbpar_gpu.dim_x, lbpar_gpu.dim_y, lbpar_gpu.dim_z, + lbpar_gpu.dim[0], lbpar_gpu.dim[1], lbpar_gpu.dim[2], lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid, lbpar_gpu.agrid, lbpar_gpu.agrid, lbpar_gpu.number_of_nodes); @@ -2526,7 +2526,7 @@ SPACING %f %f %f\n\ POINT_DATA %u\n\ SCALARS density_lb float 1\n\ LOOKUP_TABLE default\n", - lbpar_gpu.dim_x, lbpar_gpu.dim_y, lbpar_gpu.dim_z, + lbpar_gpu.dim[0], lbpar_gpu.dim[1], lbpar_gpu.dim[2], lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid, lbpar_gpu.agrid, lbpar_gpu.agrid, lbpar_gpu.number_of_nodes); @@ -3656,9 +3656,9 @@ void ek_print_lbpar() { printf(" float tau = %f;\n", lbpar_gpu.tau); printf(" float time_step = %f;\n", lbpar_gpu.time_step); printf(" float bulk_viscosity = %f;\n", lbpar_gpu.bulk_viscosity); - printf(" unsigned int dim_x = %d;\n", lbpar_gpu.dim_x); - printf(" unsigned int dim_y = %d;\n", lbpar_gpu.dim_y); - printf(" unsigned int dim_z = %d;\n", lbpar_gpu.dim_z); + printf(" unsigned int dim_x = %d;\n", lbpar_gpu.dim[0]); + printf(" unsigned int dim_y = %d;\n", lbpar_gpu.dim[1]); + printf(" unsigned int dim_z = %d;\n", lbpar_gpu.dim[2]); printf(" unsigned int number_of_nodes = %d;\n", lbpar_gpu.number_of_nodes); printf(" int calc_val = %d;\n", lbpar_gpu.calc_val); printf(" int external_force_density = %d;\n", diff --git a/src/core/grid_based_algorithms/lb_boundaries.cpp b/src/core/grid_based_algorithms/lb_boundaries.cpp index 3f7338ef2b3..031b133f039 100644 --- a/src/core/grid_based_algorithms/lb_boundaries.cpp +++ b/src/core/grid_based_algorithms/lb_boundaries.cpp @@ -107,9 +107,9 @@ void ek_init_boundaries() { << "no charged species available to create wall charge\n"; } - for (int z = 0; z < int(lbpar_gpu.dim_z); z++) { - for (int y = 0; y < int(lbpar_gpu.dim_y); y++) { - for (int x = 0; x < int(lbpar_gpu.dim_x); x++) { + for (int z = 0; z < int(lbpar_gpu.dim[2]); z++) { + for (int y = 0; y < int(lbpar_gpu.dim[1]); y++) { + for (int x = 0; x < int(lbpar_gpu.dim[0]); x++) { auto const pos = static_cast(lbpar_gpu.agrid) * (Utils::Vector3d{1. * x, 1. * y, 1. * z} + Utils::Vector3d::broadcast(0.5)); @@ -166,9 +166,9 @@ void lb_init_boundaries() { std::vector host_boundary_index_list; size_t size_of_index; - for (int z = 0; z < int(lbpar_gpu.dim_z); z++) { - for (int y = 0; y < int(lbpar_gpu.dim_y); y++) { - for (int x = 0; x < int(lbpar_gpu.dim_x); x++) { + for (int z = 0; z < int(lbpar_gpu.dim[2]); z++) { + for (int y = 0; y < int(lbpar_gpu.dim[1]); y++) { + for (int x = 0; x < int(lbpar_gpu.dim[0]); x++) { auto const pos = static_cast(lbpar_gpu.agrid) * (Utils::Vector3d{1. * x, 1. * y, 1. * z} + Utils::Vector3d::broadcast(0.5)); @@ -184,7 +184,8 @@ void lb_init_boundaries() { host_boundary_node_list.resize(size_of_index); host_boundary_index_list.resize(size_of_index); host_boundary_node_list[number_of_boundnodes] = - x + lbpar_gpu.dim_x * y + lbpar_gpu.dim_x * lbpar_gpu.dim_y * z; + x + lbpar_gpu.dim[0] * y + + lbpar_gpu.dim[0] * lbpar_gpu.dim[1] * z; auto const boundary_number = std::distance(lbboundaries.begin(), boundary.base()) - 1; host_boundary_index_list[number_of_boundnodes] = diff --git a/src/core/grid_based_algorithms/lb_interface.cpp b/src/core/grid_based_algorithms/lb_interface.cpp index b0ce22b7d7c..79051599183 100644 --- a/src/core/grid_based_algorithms/lb_interface.cpp +++ b/src/core/grid_based_algorithms/lb_interface.cpp @@ -520,7 +520,7 @@ void lb_lbfluid_print_vtk_boundary(const std::string &filename) { "ASCII\nDATASET STRUCTURED_POINTS\nDIMENSIONS %u %u %u\n" "ORIGIN %f %f %f\nSPACING %f %f %f\nPOINT_DATA %u\n" "SCALARS boundary float 1\nLOOKUP_TABLE default\n", - lbpar_gpu.dim_x, lbpar_gpu.dim_y, lbpar_gpu.dim_z, + lbpar_gpu.dim[0], lbpar_gpu.dim[1], lbpar_gpu.dim[2], lbpar_gpu.agrid * 0.5, lbpar_gpu.agrid * 0.5, lbpar_gpu.agrid * 0.5, lbpar_gpu.agrid, lbpar_gpu.agrid, lbpar_gpu.agrid, lbpar_gpu.number_of_nodes); @@ -606,8 +606,8 @@ void lb_lbfluid_print_vtk_velocity(const std::string &filename, for (pos[1] = bb_low[1]; pos[1] < bb_high[1]; pos[1]++) for (pos[0] = bb_low[0]; pos[0] < bb_high[0]; pos[0]++) { auto const j = - static_cast(lbpar_gpu.dim_y * lbpar_gpu.dim_x * pos[2] + - lbpar_gpu.dim_x * pos[1] + pos[0]); + static_cast(lbpar_gpu.dim[0] * lbpar_gpu.dim[0] * pos[2] + + lbpar_gpu.dim[0] * pos[1] + pos[0]); fprintf(fp, "%f %f %f\n", host_values[j].v[0] * lattice_speed, host_values[j].v[1] * lattice_speed, host_values[j].v[2] * lattice_speed); @@ -651,10 +651,10 @@ void lb_lbfluid_print_boundary(const std::string &filename) { Utils::Vector3i xyz; for (int j = 0; j < static_cast(lbpar_gpu.number_of_nodes); ++j) { - xyz[0] = j % lbpar_gpu.dim_x; - auto k = j / lbpar_gpu.dim_x; - xyz[1] = k % lbpar_gpu.dim_y; - k /= lbpar_gpu.dim_y; + xyz[0] = j % lbpar_gpu.dim[0]; + auto k = j / lbpar_gpu.dim[0]; + xyz[1] = k % lbpar_gpu.dim[1]; + k /= lbpar_gpu.dim[1]; xyz[2] = k; fprintf(fp, "%f %f %f %u\n", (xyz[0] + 0.5) * lbpar_gpu.agrid, (xyz[1] + 0.5) * lbpar_gpu.agrid, @@ -695,10 +695,10 @@ void lb_lbfluid_print_velocity(const std::string &filename) { Utils::Vector3i xyz; int j; for (j = 0; j < int(lbpar_gpu.number_of_nodes); ++j) { - xyz[0] = j % lbpar_gpu.dim_x; - auto k = j / lbpar_gpu.dim_x; - xyz[1] = k % lbpar_gpu.dim_y; - k /= lbpar_gpu.dim_y; + xyz[0] = j % lbpar_gpu.dim[0]; + auto k = j / lbpar_gpu.dim[0]; + xyz[1] = k % lbpar_gpu.dim[1]; + k /= lbpar_gpu.dim[1]; xyz[2] = k; fprintf(fp, "%f %f %f %f %f %f\n", (xyz[0] + 0.5) * agrid, (xyz[1] + 0.5) * agrid, (xyz[2] + 0.5) * agrid, @@ -734,21 +734,21 @@ void lb_lbfluid_save_checkpoint(const std::string &filename, bool binary) { std::fstream cpfile(filename, std::ios::out); cpfile << std::fixed; cpfile.precision(8); - cpfile << lbpar_gpu.dim_x << " "; - cpfile << lbpar_gpu.dim_y << " "; - cpfile << lbpar_gpu.dim_z << "\n"; + cpfile << lbpar_gpu.dim[0] << " "; + cpfile << lbpar_gpu.dim[1] << " "; + cpfile << lbpar_gpu.dim[2] << "\n"; for (int n = 0; n < (19 * int(lbpar_gpu.number_of_nodes)); n++) { cpfile << host_checkpoint_vd[n] << "\n"; } cpfile.close(); } else { std::fstream cpfile(filename, std::ios::out | std::ios::binary); - cpfile.write(reinterpret_cast(&lbpar_gpu.dim_x), - sizeof(lbpar_gpu.dim_x)); - cpfile.write(reinterpret_cast(&lbpar_gpu.dim_y), - sizeof(lbpar_gpu.dim_y)); - cpfile.write(reinterpret_cast(&lbpar_gpu.dim_z), - sizeof(lbpar_gpu.dim_z)); + cpfile.write(reinterpret_cast(&lbpar_gpu.dim[0]), + sizeof(lbpar_gpu.dim[0])); + cpfile.write(reinterpret_cast(&lbpar_gpu.dim[1]), + sizeof(lbpar_gpu.dim[1])); + cpfile.write(reinterpret_cast(&lbpar_gpu.dim[2]), + sizeof(lbpar_gpu.dim[2])); cpfile.write(reinterpret_cast(host_checkpoint_vd.data()), 19 * sizeof(float) * lbpar_gpu.number_of_nodes); cpfile.close(); @@ -818,18 +818,18 @@ void lb_lbfluid_load_checkpoint(const std::string &filename, bool binary) { throw std::runtime_error(err_msg + "incorrectly formatted data."); } } - if (saved_gridsize[0] != lbpar_gpu.dim_x || - saved_gridsize[1] != lbpar_gpu.dim_y || - saved_gridsize[2] != lbpar_gpu.dim_z) { + if (saved_gridsize[0] != lbpar_gpu.dim[0] || + saved_gridsize[1] != lbpar_gpu.dim[1] || + saved_gridsize[2] != lbpar_gpu.dim[2]) { fclose(cpfile); throw std::runtime_error(err_msg + "grid dimensions mismatch, read [" + std::to_string(saved_gridsize[0]) + ' ' + std::to_string(saved_gridsize[1]) + ' ' + std::to_string(saved_gridsize[2]) + "], expected [" + - std::to_string(lbpar_gpu.dim_x) + ' ' + - std::to_string(lbpar_gpu.dim_y) + ' ' + - std::to_string(lbpar_gpu.dim_z) + "]."); + std::to_string(lbpar_gpu.dim[0]) + ' ' + + std::to_string(lbpar_gpu.dim[1]) + ' ' + + std::to_string(lbpar_gpu.dim[2]) + "]."); } for (int n = 0; n < (19 * int(lbpar_gpu.number_of_nodes)); n++) { res = fscanf(cpfile, "%f", &host_checkpoint_vd[n]); @@ -848,18 +848,18 @@ void lb_lbfluid_load_checkpoint(const std::string &filename, bool binary) { fclose(cpfile); throw std::runtime_error(err_msg + "incorrectly formatted data."); } - if (saved_gridsize[0] != lbpar_gpu.dim_x || - saved_gridsize[1] != lbpar_gpu.dim_y || - saved_gridsize[2] != lbpar_gpu.dim_z) { + if (saved_gridsize[0] != lbpar_gpu.dim[0] || + saved_gridsize[1] != lbpar_gpu.dim[1] || + saved_gridsize[2] != lbpar_gpu.dim[2]) { fclose(cpfile); throw std::runtime_error(err_msg + "grid dimensions mismatch, read [" + std::to_string(saved_gridsize[0]) + ' ' + std::to_string(saved_gridsize[1]) + ' ' + std::to_string(saved_gridsize[2]) + "], expected [" + - std::to_string(lbpar_gpu.dim_x) + ' ' + - std::to_string(lbpar_gpu.dim_y) + ' ' + - std::to_string(lbpar_gpu.dim_z) + "]."); + std::to_string(lbpar_gpu.dim[0]) + ' ' + + std::to_string(lbpar_gpu.dim[1]) + ' ' + + std::to_string(lbpar_gpu.dim[2]) + "]."); } if (fread(host_checkpoint_vd.data(), sizeof(float), 19 * int(lbpar_gpu.number_of_nodes), @@ -981,9 +981,9 @@ void lb_lbfluid_load_checkpoint(const std::string &filename, bool binary) { Utils::Vector3i lb_lbfluid_get_shape() { if (lattice_switch == ActiveLB::GPU) { #ifdef CUDA - return {static_cast(lbpar_gpu.dim_x), - static_cast(lbpar_gpu.dim_y), - static_cast(lbpar_gpu.dim_z)}; + return {static_cast(lbpar_gpu.dim[0]), + static_cast(lbpar_gpu.dim[1]), + static_cast(lbpar_gpu.dim[2])}; #endif } if (lattice_switch == ActiveLB::CPU) { diff --git a/src/core/grid_based_algorithms/lbgpu.cpp b/src/core/grid_based_algorithms/lbgpu.cpp index 42b59983a0d..02d2d4715e2 100644 --- a/src/core/grid_based_algorithms/lbgpu.cpp +++ b/src/core/grid_based_algorithms/lbgpu.cpp @@ -211,16 +211,14 @@ void lb_GPU_sanity_checks() { void lb_set_agrid_gpu(double agrid) { lbpar_gpu.agrid = static_cast(agrid); - lbpar_gpu.dim_x = + lbpar_gpu.dim[0] = static_cast(round(box_geo.length()[0] / agrid)); - lbpar_gpu.dim_y = + lbpar_gpu.dim[1] = static_cast(round(box_geo.length()[1] / agrid)); - lbpar_gpu.dim_z = + lbpar_gpu.dim[2] = static_cast(round(box_geo.length()[2] / agrid)); - unsigned int tmp[3]; - tmp[0] = lbpar_gpu.dim_x; - tmp[1] = lbpar_gpu.dim_y; - tmp[2] = lbpar_gpu.dim_z; + Utils::Vector tmp(lbpar_gpu.dim); + /* sanity checks */ for (int dir = 0; dir < 3; dir++) { /* check if box_l is compatible with lattice spacing */ @@ -235,7 +233,8 @@ void lb_set_agrid_gpu(double agrid) { } } lbpar_gpu.number_of_nodes = - lbpar_gpu.dim_x * lbpar_gpu.dim_y * lbpar_gpu.dim_z; + std::accumulate(lbpar_gpu.dim.begin(), lbpar_gpu.dim.end(), 1u, + std::multiplies()); } #endif /* CUDA */ diff --git a/src/core/grid_based_algorithms/lbgpu.hpp b/src/core/grid_based_algorithms/lbgpu.hpp index 3daf39e0bec..7ae00165a11 100644 --- a/src/core/grid_based_algorithms/lbgpu.hpp +++ b/src/core/grid_based_algorithms/lbgpu.hpp @@ -83,9 +83,7 @@ struct LB_parameters_gpu { /** MD timestep */ float time_step; - unsigned int dim_x; - unsigned int dim_y; - unsigned int dim_z; + Utils::Array dim; unsigned int number_of_nodes; #ifdef LB_BOUNDARIES_GPU @@ -233,10 +231,8 @@ void lb_coupling_set_rng_state_gpu(uint64_t counter); /** Calculate the node index from its coordinates */ inline unsigned int calculate_node_index(LB_parameters_gpu const &lbpar, Utils::Vector3i const &coord) { - return static_cast(Utils::get_linear_index( - coord, Utils::Vector3i{static_cast(lbpar.dim_x), - static_cast(lbpar.dim_y), - static_cast(lbpar.dim_z)})); + return static_cast( + Utils::get_linear_index(coord, Utils::Vector3i(lbpar_gpu.dim))); } /**@}*/ diff --git a/src/core/grid_based_algorithms/lbgpu_cuda.cu b/src/core/grid_based_algorithms/lbgpu_cuda.cu index 230ce067f55..c5775346e46 100644 --- a/src/core/grid_based_algorithms/lbgpu_cuda.cu +++ b/src/core/grid_based_algorithms/lbgpu_cuda.cu @@ -126,10 +126,10 @@ OptionalCounter rng_counter_fluid_gpu; * @param[in] index Node index / thread index */ template __device__ uint3 index_to_xyz(T index) { - auto const x = index % para->dim_x; - index /= para->dim_x; - auto const y = index % para->dim_y; - index /= para->dim_y; + auto const x = index % para->dim[0]; + index /= para->dim[0]; + auto const y = index % para->dim[1]; + index /= para->dim[1]; auto const z = index; return {x, y, z}; } @@ -139,7 +139,7 @@ template __device__ uint3 index_to_xyz(T index) { */ template __device__ T xyz_to_index(T x, T y, T z) { return x + - static_cast(para->dim_x) * (y + static_cast(para->dim_y) * z); + static_cast(para->dim[0]) * (y + static_cast(para->dim[1]) * z); } /** Calculate modes from the populations (space-transform). @@ -503,127 +503,127 @@ __device__ void calc_n_from_modes_push(LB_nodes_gpu n_b, unsigned int y = xyz.y; unsigned int z = xyz.z; - n_b.populations[x + para->dim_x * y + para->dim_x * para->dim_y * z][0] = + n_b.populations[x + para->dim[0] * y + para->dim[0] * para->dim[1] * z][0] = 1.0f / 3.0f * (mode[0] - mode[4] + mode[16]); - n_b.populations[(x + 1) % para->dim_x + para->dim_x * y + - para->dim_x * para->dim_y * z][1] = + n_b.populations[(x + 1) % para->dim[0] + para->dim[0] * y + + para->dim[0] * para->dim[1] * z][1] = 1.0f / 18.0f * (mode[0] + mode[1] + mode[5] + mode[6] - mode[17] - mode[18] - 2.0f * (mode[10] + mode[16])); - n_b.populations[(para->dim_x + x - 1) % para->dim_x + para->dim_x * y + - para->dim_x * para->dim_y * z][2] = + n_b.populations[(para->dim[0] + x - 1) % para->dim[0] + para->dim[0] * y + + para->dim[0] * para->dim[1] * z][2] = 1.0f / 18.0f * (mode[0] - mode[1] + mode[5] + mode[6] - mode[17] - mode[18] + 2.0f * (mode[10] - mode[16])); - n_b.populations[x + para->dim_x * ((y + 1) % para->dim_y) + - para->dim_x * para->dim_y * z][3] = + n_b.populations[x + para->dim[0] * ((y + 1) % para->dim[1]) + + para->dim[0] * para->dim[1] * z][3] = 1.0f / 18.0f * (mode[0] + mode[2] - mode[5] + mode[6] + mode[17] - mode[18] - 2.0f * (mode[11] + mode[16])); - n_b.populations[x + para->dim_x * ((para->dim_y + y - 1) % para->dim_y) + - para->dim_x * para->dim_y * z][4] = + n_b.populations[x + para->dim[0] * ((para->dim[1] + y - 1) % para->dim[1]) + + para->dim[0] * para->dim[1] * z][4] = 1.0f / 18.0f * (mode[0] - mode[2] - mode[5] + mode[6] + mode[17] - mode[18] + 2.0f * (mode[11] - mode[16])); - n_b.populations[x + para->dim_x * y + - para->dim_x * para->dim_y * ((z + 1) % para->dim_z)][5] = + n_b.populations[x + para->dim[0] * y + + para->dim[0] * para->dim[1] * ((z + 1) % para->dim[2])][5] = 1.0f / 18.0f * (mode[0] + mode[3] - 2.0f * (mode[6] + mode[12] + mode[16] - mode[18])); - n_b.populations[x + para->dim_x * y + - para->dim_x * para->dim_y * - ((para->dim_z + z - 1) % para->dim_z)][6] = + n_b.populations[x + para->dim[0] * y + + para->dim[0] * para->dim[1] * + ((para->dim[2] + z - 1) % para->dim[2])][6] = 1.0f / 18.0f * (mode[0] - mode[3] - 2.0f * (mode[6] - mode[12] + mode[16] - mode[18])); - n_b.populations[(x + 1) % para->dim_x + - para->dim_x * ((y + 1) % para->dim_y) + - para->dim_x * para->dim_y * z][7] = + n_b.populations[(x + 1) % para->dim[0] + + para->dim[0] * ((y + 1) % para->dim[1]) + + para->dim[0] * para->dim[1] * z][7] = 1.0f / 36.0f * (mode[0] + mode[1] + mode[2] + mode[4] + 2.0f * mode[6] + mode[7] + mode[10] + mode[11] + mode[13] + mode[14] + mode[16] + 2.0f * mode[18]); - n_b.populations[(para->dim_x + x - 1) % para->dim_x + - para->dim_x * ((para->dim_y + y - 1) % para->dim_y) + - para->dim_x * para->dim_y * z][8] = + n_b.populations[(para->dim[0] + x - 1) % para->dim[0] + + para->dim[0] * ((para->dim[1] + y - 1) % para->dim[1]) + + para->dim[0] * para->dim[1] * z][8] = 1.0f / 36.0f * (mode[0] - mode[1] - mode[2] + mode[4] + 2.0f * mode[6] + mode[7] - mode[10] - mode[11] - mode[13] - mode[14] + mode[16] + 2.0f * mode[18]); - n_b.populations[(x + 1) % para->dim_x + - para->dim_x * ((para->dim_y + y - 1) % para->dim_y) + - para->dim_x * para->dim_y * z][9] = + n_b.populations[(x + 1) % para->dim[0] + + para->dim[0] * ((para->dim[1] + y - 1) % para->dim[1]) + + para->dim[0] * para->dim[1] * z][9] = 1.0f / 36.0f * (mode[0] + mode[1] - mode[2] + mode[4] + 2.0f * mode[6] - mode[7] + mode[10] - mode[11] + mode[13] - mode[14] + mode[16] + 2.0f * mode[18]); - n_b.populations[(para->dim_x + x - 1) % para->dim_x + - para->dim_x * ((y + 1) % para->dim_y) + - para->dim_x * para->dim_y * z][10] = + n_b.populations[(para->dim[0] + x - 1) % para->dim[0] + + para->dim[0] * ((y + 1) % para->dim[1]) + + para->dim[0] * para->dim[1] * z][10] = 1.0f / 36.0f * (mode[0] - mode[1] + mode[2] + mode[4] + 2.0f * mode[6] - mode[7] - mode[10] + mode[11] - mode[13] + mode[14] + mode[16] + 2.0f * mode[18]); - n_b.populations[(x + 1) % para->dim_x + para->dim_x * y + - para->dim_x * para->dim_y * ((z + 1) % para->dim_z)][11] = + n_b.populations[(x + 1) % para->dim[0] + para->dim[0] * y + + para->dim[0] * para->dim[1] * ((z + 1) % para->dim[2])][11] = 1.0f / 36.0f * (mode[0] + mode[1] + mode[3] + mode[4] + mode[5] - mode[6] + mode[8] + mode[10] + mode[12] - mode[13] + mode[15] + mode[16] + mode[17] - mode[18]); - n_b.populations[(para->dim_x + x - 1) % para->dim_x + para->dim_x * y + - para->dim_x * para->dim_y * - ((para->dim_z + z - 1) % para->dim_z)][12] = + n_b.populations[(para->dim[0] + x - 1) % para->dim[0] + para->dim[0] * y + + para->dim[0] * para->dim[1] * + ((para->dim[2] + z - 1) % para->dim[2])][12] = 1.0f / 36.0f * (mode[0] - mode[1] - mode[3] + mode[4] + mode[5] - mode[6] + mode[8] - mode[10] - mode[12] + mode[13] - mode[15] + mode[16] + mode[17] - mode[18]); - n_b.populations[(x + 1) % para->dim_x + para->dim_x * y + - para->dim_x * para->dim_y * - ((para->dim_z + z - 1) % para->dim_z)][13] = + n_b.populations[(x + 1) % para->dim[0] + para->dim[0] * y + + para->dim[0] * para->dim[1] * + ((para->dim[2] + z - 1) % para->dim[2])][13] = 1.0f / 36.0f * (mode[0] + mode[1] - mode[3] + mode[4] + mode[5] - mode[6] - mode[8] + mode[10] - mode[12] - mode[13] - mode[15] + mode[16] + mode[17] - mode[18]); - n_b.populations[(para->dim_x + x - 1) % para->dim_x + para->dim_x * y + - para->dim_x * para->dim_y * ((z + 1) % para->dim_z)][14] = + n_b.populations[(para->dim[0] + x - 1) % para->dim[0] + para->dim[0] * y + + para->dim[0] * para->dim[1] * ((z + 1) % para->dim[2])][14] = 1.0f / 36.0f * (mode[0] - mode[1] + mode[3] + mode[4] + mode[5] - mode[6] - mode[8] - mode[10] + mode[12] + mode[13] + mode[15] + mode[16] + mode[17] - mode[18]); - n_b.populations[x + para->dim_x * ((y + 1) % para->dim_y) + - para->dim_x * para->dim_y * ((z + 1) % para->dim_z)][15] = + n_b.populations[x + para->dim[0] * ((y + 1) % para->dim[1]) + + para->dim[0] * para->dim[1] * ((z + 1) % para->dim[2])][15] = 1.0f / 36.0f * (mode[0] + mode[2] + mode[3] + mode[4] - mode[5] - mode[6] + mode[9] + mode[11] + mode[12] - mode[14] - mode[15] + mode[16] - mode[17] - mode[18]); - n_b.populations[x + para->dim_x * ((para->dim_y + y - 1) % para->dim_y) + - para->dim_x * para->dim_y * - ((para->dim_z + z - 1) % para->dim_z)][16] = + n_b.populations[x + para->dim[0] * ((para->dim[1] + y - 1) % para->dim[1]) + + para->dim[0] * para->dim[1] * + ((para->dim[2] + z - 1) % para->dim[2])][16] = 1.0f / 36.0f * (mode[0] - mode[2] - mode[3] + mode[4] - mode[5] - mode[6] + mode[9] - mode[11] - mode[12] + mode[14] + mode[15] + mode[16] - mode[17] - mode[18]); - n_b.populations[x + para->dim_x * ((y + 1) % para->dim_y) + - para->dim_x * para->dim_y * - ((para->dim_z + z - 1) % para->dim_z)][17] = + n_b.populations[x + para->dim[0] * ((y + 1) % para->dim[1]) + + para->dim[0] * para->dim[1] * + ((para->dim[2] + z - 1) % para->dim[2])][17] = 1.0f / 36.0f * (mode[0] + mode[2] - mode[3] + mode[4] - mode[5] - mode[6] - mode[9] + mode[11] - mode[12] - mode[14] + mode[15] + mode[16] - mode[17] - mode[18]); - n_b.populations[x + para->dim_x * ((para->dim_y + y - 1) % para->dim_y) + - para->dim_x * para->dim_y * ((z + 1) % para->dim_z)][18] = + n_b.populations[x + para->dim[0] * ((para->dim[1] + y - 1) % para->dim[1]) + + para->dim[0] * para->dim[1] * ((z + 1) % para->dim[2])][18] = 1.0f / 36.0f * (mode[0] - mode[2] + mode[3] + mode[4] - mode[5] - mode[6] - mode[9] - mode[11] + mode[12] + mode[14] - mode[15] + mode[16] - mode[17] - @@ -673,11 +673,14 @@ __device__ void bounce_back_boundaries(LB_nodes_gpu n_curr, (v[0] * static_cast(c[0]) + v[1] * static_cast(c[1]) + \ v[2] * static_cast(c[2])); \ pop_to_bounce_back = n_curr.populations[index][population]; \ - to_index_x = (x + static_cast(c[0]) + para->dim_x) % para->dim_x; \ - to_index_y = (y + static_cast(c[1]) + para->dim_y) % para->dim_y; \ - to_index_z = (z + static_cast(c[2]) + para->dim_z) % para->dim_z; \ - to_index = to_index_x + para->dim_x * to_index_y + \ - para->dim_x * para->dim_y * to_index_z; \ + to_index_x = \ + (x + static_cast(c[0]) + para->dim[0]) % para->dim[0]; \ + to_index_y = \ + (y + static_cast(c[1]) + para->dim[1]) % para->dim[1]; \ + to_index_z = \ + (z + static_cast(c[2]) + para->dim[2]) % para->dim[2]; \ + to_index = to_index_x + para->dim[0] * to_index_y + \ + para->dim[0] * para->dim[1] * to_index_z; \ if (n_curr.boundary[to_index] == 0) { \ boundary_force[0] += \ (2.0f * pop_to_bounce_back + shift) * static_cast(c[0]); \ @@ -1149,11 +1152,11 @@ velocity_interpolation(LB_nodes_gpu n_a, float const *particle_position, #pragma unroll 1 for (int k = 0; k < 3; ++k) { auto const x = fold_if_necessary(center_node_index[0] - 1 + i, - static_cast(para->dim_x)); + static_cast(para->dim[0])); auto const y = fold_if_necessary(center_node_index[1] - 1 + j, - static_cast(para->dim_y)); + static_cast(para->dim[1])); auto const z = fold_if_necessary(center_node_index[2] - 1 + k, - static_cast(para->dim_z)); + static_cast(para->dim[2])); delta[cnt] = temp_delta[i].x * temp_delta[j].y * temp_delta[k].z; auto const index = static_cast(xyz_to_index(x, y, z)); node_indices[cnt] = index; @@ -1204,18 +1207,18 @@ velocity_interpolation(LB_nodes_gpu n_a, float const *particle_position, // modulo for negative numbers is strange at best, shift to make sure we are // positive - int const x = (left_node_index[0] + static_cast(para->dim_x)) % - static_cast(para->dim_x); - int const y = (left_node_index[1] + static_cast(para->dim_y)) % - static_cast(para->dim_y); - int const z = (left_node_index[2] + static_cast(para->dim_z)) % - static_cast(para->dim_z); + int const x = (left_node_index[0] + static_cast(para->dim[0])) % + static_cast(para->dim[0]); + int const y = (left_node_index[1] + static_cast(para->dim[1])) % + static_cast(para->dim[1]); + int const z = (left_node_index[2] + static_cast(para->dim[2])) % + static_cast(para->dim[2]); auto fold_if_necessary = [](int ind, int dim) { return ind >= dim ? ind % dim : ind; }; - auto const xp1 = fold_if_necessary(x + 1, static_cast(para->dim_x)); - auto const yp1 = fold_if_necessary(y + 1, static_cast(para->dim_y)); - auto const zp1 = fold_if_necessary(z + 1, static_cast(para->dim_z)); + auto const xp1 = fold_if_necessary(x + 1, static_cast(para->dim[0])); + auto const yp1 = fold_if_necessary(y + 1, static_cast(para->dim[1])); + auto const zp1 = fold_if_necessary(z + 1, static_cast(para->dim[2])); node_index[0] = static_cast(xyz_to_index(x, y, z)); node_index[1] = static_cast(xyz_to_index(xp1, y, z)); node_index[2] = static_cast(xyz_to_index(x, yp1, z)); diff --git a/src/core/virtual_sites/lb_inertialess_tracers_cuda.cu b/src/core/virtual_sites/lb_inertialess_tracers_cuda.cu index f0ea8653599..43d20db4c71 100644 --- a/src/core/virtual_sites/lb_inertialess_tracers_cuda.cu +++ b/src/core/virtual_sites/lb_inertialess_tracers_cuda.cu @@ -110,26 +110,28 @@ ForcesIntoFluid_Kernel(const IBM_CUDA_ParticleDataInput *const particle_input, // modulo for negative numbers is strange at best, shift to make sure we are // positive - auto const x = static_cast(my_left[0] + para.dim_x); - auto const y = static_cast(my_left[1] + para.dim_y); - auto const z = static_cast(my_left[2] + para.dim_z); - - node_index[0] = x % para.dim_x + para.dim_x * (y % para.dim_y) + - para.dim_x * para.dim_y * (z % para.dim_z); - node_index[1] = (x + 1) % para.dim_x + para.dim_x * (y % para.dim_y) + - para.dim_x * para.dim_y * (z % para.dim_z); - node_index[2] = x % para.dim_x + para.dim_x * ((y + 1) % para.dim_y) + - para.dim_x * para.dim_y * (z % para.dim_z); - node_index[3] = (x + 1) % para.dim_x + para.dim_x * ((y + 1) % para.dim_y) + - para.dim_x * para.dim_y * (z % para.dim_z); - node_index[4] = x % para.dim_x + para.dim_x * (y % para.dim_y) + - para.dim_x * para.dim_y * ((z + 1) % para.dim_z); - node_index[5] = (x + 1) % para.dim_x + para.dim_x * (y % para.dim_y) + - para.dim_x * para.dim_y * ((z + 1) % para.dim_z); - node_index[6] = x % para.dim_x + para.dim_x * ((y + 1) % para.dim_y) + - para.dim_x * para.dim_y * ((z + 1) % para.dim_z); - node_index[7] = (x + 1) % para.dim_x + para.dim_x * ((y + 1) % para.dim_y) + - para.dim_x * para.dim_y * ((z + 1) % para.dim_z); + auto const x = static_cast(my_left[0] + para.dim[0]); + auto const y = static_cast(my_left[1] + para.dim[1]); + auto const z = static_cast(my_left[2] + para.dim[2]); + + node_index[0] = x % para.dim[0] + para.dim[0] * (y % para.dim[1]) + + para.dim[0] * para.dim[1] * (z % para.dim[2]); + node_index[1] = (x + 1) % para.dim[0] + para.dim[0] * (y % para.dim[1]) + + para.dim[0] * para.dim[1] * (z % para.dim[2]); + node_index[2] = x % para.dim[0] + para.dim[0] * ((y + 1) % para.dim[1]) + + para.dim[0] * para.dim[1] * (z % para.dim[2]); + node_index[3] = (x + 1) % para.dim[0] + + para.dim[0] * ((y + 1) % para.dim[1]) + + para.dim[0] * para.dim[1] * (z % para.dim[2]); + node_index[4] = x % para.dim[0] + para.dim[0] * (y % para.dim[1]) + + para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); + node_index[5] = (x + 1) % para.dim[0] + para.dim[0] * (y % para.dim[1]) + + para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); + node_index[6] = x % para.dim[0] + para.dim[0] * ((y + 1) % para.dim[1]) + + para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); + node_index[7] = (x + 1) % para.dim[0] + + para.dim[0] * ((y + 1) % para.dim[1]) + + para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); for (int i = 0; i < 8; ++i) { // Atomic add is essential because this runs in parallel! @@ -192,26 +194,28 @@ __global__ void ParticleVelocitiesFromLB_Kernel( // modulo for negative numbers is strange at best, shift to make sure we are // positive - auto const x = static_cast(my_left[0] + para.dim_x); - auto const y = static_cast(my_left[1] + para.dim_y); - auto const z = static_cast(my_left[2] + para.dim_z); - - node_index[0] = x % para.dim_x + para.dim_x * (y % para.dim_y) + - para.dim_x * para.dim_y * (z % para.dim_z); - node_index[1] = (x + 1) % para.dim_x + para.dim_x * (y % para.dim_y) + - para.dim_x * para.dim_y * (z % para.dim_z); - node_index[2] = x % para.dim_x + para.dim_x * ((y + 1) % para.dim_y) + - para.dim_x * para.dim_y * (z % para.dim_z); - node_index[3] = (x + 1) % para.dim_x + para.dim_x * ((y + 1) % para.dim_y) + - para.dim_x * para.dim_y * (z % para.dim_z); - node_index[4] = x % para.dim_x + para.dim_x * (y % para.dim_y) + - para.dim_x * para.dim_y * ((z + 1) % para.dim_z); - node_index[5] = (x + 1) % para.dim_x + para.dim_x * (y % para.dim_y) + - para.dim_x * para.dim_y * ((z + 1) % para.dim_z); - node_index[6] = x % para.dim_x + para.dim_x * ((y + 1) % para.dim_y) + - para.dim_x * para.dim_y * ((z + 1) % para.dim_z); - node_index[7] = (x + 1) % para.dim_x + para.dim_x * ((y + 1) % para.dim_y) + - para.dim_x * para.dim_y * ((z + 1) % para.dim_z); + auto const x = static_cast(my_left[0] + para.dim[0]); + auto const y = static_cast(my_left[1] + para.dim[1]); + auto const z = static_cast(my_left[2] + para.dim[2]); + + node_index[0] = x % para.dim[0] + para.dim[0] * (y % para.dim[1]) + + para.dim[0] * para.dim[1] * (z % para.dim[2]); + node_index[1] = (x + 1) % para.dim[0] + para.dim[0] * (y % para.dim[1]) + + para.dim[0] * para.dim[1] * (z % para.dim[2]); + node_index[2] = x % para.dim[0] + para.dim[0] * ((y + 1) % para.dim[1]) + + para.dim[0] * para.dim[1] * (z % para.dim[2]); + node_index[3] = (x + 1) % para.dim[0] + + para.dim[0] * ((y + 1) % para.dim[1]) + + para.dim[0] * para.dim[1] * (z % para.dim[2]); + node_index[4] = x % para.dim[0] + para.dim[0] * (y % para.dim[1]) + + para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); + node_index[5] = (x + 1) % para.dim[0] + para.dim[0] * (y % para.dim[1]) + + para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); + node_index[6] = x % para.dim[0] + para.dim[0] * ((y + 1) % para.dim[1]) + + para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); + node_index[7] = (x + 1) % para.dim[0] + + para.dim[0] * ((y + 1) % para.dim[1]) + + para.dim[0] * para.dim[1] * ((z + 1) % para.dim[2]); for (int i = 0; i < 8; ++i) { double local_rho; From 5cbb4fa41cf91979e96ded72895339d1004bd386 Mon Sep 17 00:00:00 2001 From: Kai Szuttor Date: Tue, 30 Mar 2021 10:46:17 +0200 Subject: [PATCH 6/7] LB: simplify the check for commensurability. --- src/core/grid_based_algorithms/lbgpu.cpp | 29 ++++++++++++------------ 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/core/grid_based_algorithms/lbgpu.cpp b/src/core/grid_based_algorithms/lbgpu.cpp index 02d2d4715e2..545dd6ab256 100644 --- a/src/core/grid_based_algorithms/lbgpu.cpp +++ b/src/core/grid_based_algorithms/lbgpu.cpp @@ -217,20 +217,21 @@ void lb_set_agrid_gpu(double agrid) { static_cast(round(box_geo.length()[1] / agrid)); lbpar_gpu.dim[2] = static_cast(round(box_geo.length()[2] / agrid)); - Utils::Vector tmp(lbpar_gpu.dim); - - /* sanity checks */ - for (int dir = 0; dir < 3; dir++) { - /* check if box_l is compatible with lattice spacing */ - auto const box_l = static_cast(box_geo.length()[dir]); - auto const tolerance = 5.f * (std::nextafter(box_l, box_l + 1.f) - box_l); - auto const difference = fabs(box_l - static_cast(tmp[dir] * agrid)); - if (difference > tolerance) { - runtimeErrorMsg() << "Lattice spacing agrid= " << agrid - << " is incompatible with box_l[" << dir - << "]=" << box_geo.length()[dir] - << ", n_nodes=" << tmp[dir] << " err= " << difference; - } + + Utils::Vector box_from_dim( + Utils::Vector(lbpar_gpu.dim) * agrid); + Utils::Vector box_lf(box_geo.length()); + + auto const difference_vec = box_lf - box_from_dim; + auto const commensurable = + std::all_of(difference_vec.begin(), difference_vec.end(), [](auto d) { + return std::abs(d) < 50 * std::numeric_limits::epsilon(); + }); + if (not commensurable) { + runtimeErrorMsg() << "Lattice spacing agrid= " << agrid + << " is incompatible with one of the box dimensions: " + << box_geo.length()[0] << " " << box_geo.length()[1] + << " " << box_geo.length()[2]; } lbpar_gpu.number_of_nodes = std::accumulate(lbpar_gpu.dim.begin(), lbpar_gpu.dim.end(), 1u, From 720c688ca7ce5998535d3e5454d1b3c14398b2d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Tue, 30 Mar 2021 14:48:45 +0200 Subject: [PATCH 7/7] core: Use relative error with fixed tolerance Now the CPU and GPU implementations of LB have a similar tolerance. --- src/core/grid_based_algorithms/lbgpu.cpp | 10 ++++++---- testsuite/python/lb.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/core/grid_based_algorithms/lbgpu.cpp b/src/core/grid_based_algorithms/lbgpu.cpp index 545dd6ab256..80951abae91 100644 --- a/src/core/grid_based_algorithms/lbgpu.cpp +++ b/src/core/grid_based_algorithms/lbgpu.cpp @@ -38,6 +38,7 @@ #include #include +#include #include LB_parameters_gpu lbpar_gpu = { @@ -222,10 +223,11 @@ void lb_set_agrid_gpu(double agrid) { Utils::Vector(lbpar_gpu.dim) * agrid); Utils::Vector box_lf(box_geo.length()); - auto const difference_vec = box_lf - box_from_dim; - auto const commensurable = - std::all_of(difference_vec.begin(), difference_vec.end(), [](auto d) { - return std::abs(d) < 50 * std::numeric_limits::epsilon(); + auto const rel_difference_vec = + Utils::hadamard_division(box_lf - box_from_dim, box_lf); + auto const commensurable = std::all_of( + rel_difference_vec.begin(), rel_difference_vec.end(), [](auto d) { + return std::abs(d) < std::numeric_limits::epsilon(); }); if (not commensurable) { runtimeErrorMsg() << "Lattice spacing agrid= " << agrid diff --git a/testsuite/python/lb.py b/testsuite/python/lb.py index 474679eaa03..9ba10384f39 100644 --- a/testsuite/python/lb.py +++ b/testsuite/python/lb.py @@ -235,7 +235,7 @@ def test_incompatible_agrid(self): self.lbf = self.lb_class( visc=self.params['viscosity'], dens=self.params['dens'], - agrid=self.params['agrid'] + 1e-5, + agrid=self.params['agrid'] + 1e-6, tau=self.system.time_step, ext_force_density=[0, 0, 0]) print("\nTesting LB error messages:", file=sys.stderr)