diff --git a/.clang-tidy b/.clang-tidy index 5a2b0fed5dc..07b79a9504f 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -3,6 +3,7 @@ Checks: | clang-diagnostic-*, clang-analyzer-*, -clang-analyzer-core.NullDereference, + -clang-analyzer-core.uninitialized.UndefReturn, -clang-analyzer-optin.mpi.MPI-Checker, -clang-analyzer-security.FloatLoopCounter, bugprone-*, diff --git a/.github/actions/build_and_check/action.yml b/.github/actions/build_and_check/action.yml new file mode 100644 index 00000000000..e9ea1f140b5 --- /dev/null +++ b/.github/actions/build_and_check/action.yml @@ -0,0 +1,31 @@ +name: 'Build and check' +description: 'Build espresso and run checks' +inputs: + asan: # id of input + description: 'Whether to build with address sanitizer' + required: true + default: 'false' + ubsan: + description: 'Whether to build with undefined behavior sanitizer' + required: true + default: 'false' + check_skip_long: # id of input + description: 'Whether to skip long python tests' + required: true + default: 'false' +runs: + using: "composite" + steps: + - run: | + brew install boost boost-mpi fftw + brew install hdf5-mpi + pip3 install numpy cython h5py scipy + shell: bash + - run: | + export myconfig=maxset with_cuda=false test_timeout=600 with_asan=${{ inputs.asan }} with_ubsan=${{ inputs.ubsan }} check_skip_long=${{ inputs.check_skip_long }} + bash maintainer/CI/build_cmake.sh + shell: bash + # This is a workaround for the unfortunate interaction of MacOS and OpenMPI 4 + # See https://github.com/open-mpi/ompi/issues/6518 + env: + OMPI_MCA_btl: "self,tcp" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index 8378f07f207..00000000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: run tests on mac - -on: - push: - pull_request: - -jobs: - github_mactest: - runs-on: macos-latest - steps: - - uses: actions/checkout@main - - uses: actions/setup-python@v2 - with: - python-version: '3.7' - - run: | - brew install boost boost-mpi fftw - brew install hdf5-mpi - pip3 install numpy cython h5py scipy - - run: | - export myconfig=maxset with_cuda=false test_timeout=600 - bash maintainer/CI/build_cmake.sh diff --git a/.github/workflows/push_pull.yml b/.github/workflows/push_pull.yml new file mode 100644 index 00000000000..b75a93e92d4 --- /dev/null +++ b/.github/workflows/push_pull.yml @@ -0,0 +1,52 @@ +name: run tests on mac + +on: + push: + pull_request: + schedule: + - cron: '0 3 * * *' + +jobs: + regular_check: + runs-on: macos-latest + if: github.event_name != 'schedule' + steps: + - name: Checkout + uses: actions/checkout@main + - name: Setup Python environment + uses: actions/setup-python@v2 + with: + python-version: '3.7' + - name: Check without sanitizer + uses: ./.github/actions/build_and_check + with: + asan: false + ubsan: false + check_skip_long: false + + sanitizer_check: + runs-on: macos-latest + if: github.event_name == 'schedule' + steps: + - name: Checkout + uses: actions/checkout@main + - name: Setup Python environment + uses: actions/setup-python@v2 + with: + python-version: '3.7' + - name: Check with sanitizer + uses: ./.github/actions/build_and_check + with: + asan: true + ubsan: true + check_skip_long: true + - name: Setting job link variable + if: ${{ failure() }} + run: | + echo "job_link=${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" >> $GITHUB_ENV + - uses: alialaa/issue-action@v1 + if: ${{ failure() }} + with: + token: ${{ secrets.GITHUB_TOKEN }} + title: Scheduled CI job has failed + body: ${{ env.job_link }} diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 03340740354..3683684bf2e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,4 @@ -image: docker.pkg.github.com/espressomd/docker/ubuntu-20.04:e583d4b2eb8eedd10068957f952bd67008475ee5 +image: docker.pkg.github.com/espressomd/docker/ubuntu-20.04:063f945eb434f6900402fd412f28a4486288c82b stages: - prepare @@ -100,6 +100,7 @@ maxset: with_scafacos: 'true' with_stokesian_dynamics: 'true' check_skip_long: 'true' + cmake_params: '-DTEST_NP=8' script: - bash maintainer/CI/build_cmake.sh tags: @@ -130,6 +131,7 @@ ubuntu:wo-dependencies: variables: myconfig: 'maxset' with_cuda: 'false' + with_hdf5: 'false' make_check_unit_tests: 'false' make_check_python: 'false' script: diff --git a/CMakeLists.txt b/CMakeLists.txt index 45ec3dffa95..489aee2b2b9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,7 +41,6 @@ include(FeatureSummary) include(GNUInstallDirs) project(ESPResSo) include(option_enum) -include(option_if_available) if(POLICY CMP0074) # make find_package() use _ROOT variables cmake_policy(SET CMP0074 NEW) @@ -69,12 +68,12 @@ set(CMAKE_FIND_FRAMEWORK LAST) # ############################################################################## option(WITH_PYTHON "Build with Python bindings" ON) -option_if_available(WITH_GSL "Build with GSL support" ON) +option(WITH_GSL "Build with GSL support" OFF) option(WITH_CUDA "Build with GPU support" OFF) -option_if_available(WITH_HDF5 "Build with HDF5 support" ON) +option(WITH_HDF5 "Build with HDF5 support" OFF) option(WITH_TESTS "Enable tests" ON) -option_if_available(WITH_SCAFACOS "Build with ScaFaCoS support" OFF) -option_if_available(WITH_STOKESIAN_DYNAMICS "Build with Stokesian Dynamics" ON) +option(WITH_SCAFACOS "Build with ScaFaCoS support" OFF) +option(WITH_STOKESIAN_DYNAMICS "Build with Stokesian Dynamics" OFF) option(WITH_BENCHMARKS "Enable benchmarks" OFF) option(WITH_VALGRIND_INSTRUMENTATION "Build with valgrind instrumentation markers" OFF) @@ -192,7 +191,7 @@ if(WITH_HDF5) # who are not familiar with the way hdf5 is distributed in Linux package # repositories (libhdf5-dev is the serial version). set(HDF5_PREFER_PARALLEL 1) - find_package(HDF5 "1.8" COMPONENTS C) + find_package(HDF5 "1.8" REQUIRED COMPONENTS C) if(HDF5_FOUND) if(HDF5_IS_PARALLEL) set(H5MD 1) @@ -200,17 +199,8 @@ if(WITH_HDF5) else() unset(H5MD) set(HDF5_FOUND FALSE) - if(NOT WITH_HDF5_IS_DEFAULT_VALUE) - message( - FATAL_ERROR - "Optional dependency HDF5 explicitly requested, but parallel version not found." - ) - endif() + message(FATAL_ERROR "HDF5 parallel version not found.") endif(HDF5_IS_PARALLEL) - elseif(NOT WITH_HDF5_IS_DEFAULT_VALUE) - message( - FATAL_ERROR - "Optional dependency HDF5 explicitly requested, but not found.") endif(HDF5_FOUND) endif(WITH_HDF5) @@ -231,50 +221,24 @@ if(WITH_HDF5 AND EXISTS "${CMAKE_SOURCE_DIR}/.git") endif() if(WITH_SCAFACOS) - find_package(PkgConfig) - pkg_check_modules(SCAFACOS scafacos) + find_package(PkgConfig REQUIRED) + pkg_check_modules(SCAFACOS scafacos REQUIRED) if(SCAFACOS_FOUND) set(SCAFACOS 1) - elseif(NOT WITH_SCAFACOS_IS_DEFAULT_VALUE) - message( - FATAL_ERROR - "Optional dependency ScaFaCoS explicitly requested, but not found.") endif(SCAFACOS_FOUND) endif(WITH_SCAFACOS) if(WITH_GSL) + find_package(GSL REQUIRED) +else() find_package(GSL) - if(GSL_FOUND) - set(GSL 1) - elseif(NOT WITH_GSL_IS_DEFAULT_VALUE) - message( - FATAL_ERROR "Optional dependency GSL explicitly requested, but not found." - ) - endif(GSL_FOUND) endif(WITH_GSL) -find_package(BLAS) -if(BLAS_FOUND) - set(BLAS 1) -endif() -find_package(LAPACK) -if(LAPACK_FOUND) - set(LAPACK 1) -endif() +if(GSL_FOUND) + set(GSL 1) +endif(GSL_FOUND) if(WITH_STOKESIAN_DYNAMICS) - if(BLAS AND LAPACK) - set(STOKESIAN_DYNAMICS 1) - endif() - if(NOT STOKESIAN_DYNAMICS AND NOT WITH_STOKESIAN_DYNAMICS_IS_DEFAULT_VALUE) - message( - FATAL_ERROR - "Optional feature Stokesian Dynamics explicitly requested, but dependencies not found." - ) - endif() -endif(WITH_STOKESIAN_DYNAMICS) - -if(STOKESIAN_DYNAMICS) set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_PREFIX}/${PYTHON_INSTDIR}/espressomd") include(FetchContent) @@ -283,16 +247,17 @@ if(STOKESIAN_DYNAMICS) GIT_REPOSITORY https://github.com/hmenke/espresso-stokesian-dynamics.git GIT_TAG c14e57655e929) FetchContent_GetProperties(stokesian_dynamics) + set(STOKESIAN_DYNAMICS 1) if(NOT stokesian_dynamics_POPULATED) FetchContent_Populate(stokesian_dynamics) add_subdirectory(${stokesian_dynamics_SOURCE_DIR} ${stokesian_dynamics_BINARY_DIR}) endif() -endif(STOKESIAN_DYNAMICS) +endif(WITH_STOKESIAN_DYNAMICS) if(WITH_VALGRIND_INSTRUMENTATION) - find_package(PkgConfig) - pkg_check_modules(VALGRIND valgrind) + find_package(PkgConfig REQUIRED) + pkg_check_modules(VALGRIND valgrind REQUIRED) if(VALGRIND_FOUND) set(VALGRIND_INSTRUMENTATION 1) message(STATUS ${VALGRIND_INCLUDE_DIRS}) @@ -306,9 +271,9 @@ endif(WITH_VALGRIND_INSTRUMENTATION) find_package(MPI 3.0 REQUIRED) -# ############################################################################## +# # Boost -# ############################################################################## +# list(APPEND BOOST_COMPONENTS mpi serialization filesystem system) @@ -423,6 +388,10 @@ target_compile_options( set(CMAKE_MACOSX_RPATH TRUE) +# +# Sanitizers +# + if(WITH_ASAN AND WITH_MSAN) message( FATAL_ERROR @@ -448,14 +417,18 @@ endif() target_link_libraries(cxx_interface INTERFACE coverage_interface) # -# Testing -# ############################################################################## +# Static analysis +# if(WITH_CLANG_TIDY) find_package(ClangTidy "${CMAKE_CXX_COMPILER_VERSION}" EXACT REQUIRED) set(CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_EXE};--extra-arg=--cuda-host-only") endif() +# +# Testing +# + if(WITH_TESTS) enable_testing() add_custom_target(check) @@ -480,6 +453,7 @@ endif(WITH_BENCHMARKS) add_subdirectory(doc) add_subdirectory(src) add_subdirectory(libs) + # # Feature summary # diff --git a/cmake/FindCUDACompilerClang.cmake b/cmake/FindCUDACompilerClang.cmake index 79df6fad7d0..ea8714d99a0 100644 --- a/cmake/FindCUDACompilerClang.cmake +++ b/cmake/FindCUDACompilerClang.cmake @@ -77,7 +77,6 @@ function(find_gpu_library) endif() endfunction(find_gpu_library) -find_gpu_library(VARNAME CUDA_LIBRARY NAMES cuda REQUIRED) find_gpu_library(VARNAME CUDART_LIBRARY NAMES cudart REQUIRED) find_gpu_library(VARNAME CUFFT_LIBRARY NAMES cufft REQUIRED) diff --git a/cmake/FindCUDACompilerNVCC.cmake b/cmake/FindCUDACompilerNVCC.cmake index 930518e3dcb..62674892ec1 100644 --- a/cmake/FindCUDACompilerNVCC.cmake +++ b/cmake/FindCUDACompilerNVCC.cmake @@ -71,7 +71,6 @@ function(find_gpu_library) endif() endfunction(find_gpu_library) -find_gpu_library(VARNAME CUDA_LIBRARY NAMES cuda REQUIRED) find_gpu_library(VARNAME CUDART_LIBRARY NAMES cudart REQUIRED) find_gpu_library(VARNAME CUDA_CUFFT_LIBRARIES NAMES cufft REQUIRED) diff --git a/cmake/option_if_available.cmake b/cmake/option_if_available.cmake deleted file mode 100644 index f8864511d05..00000000000 --- a/cmake/option_if_available.cmake +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (C) 2020 The ESPResSo project -# -# This file is part of ESPResSo. -# -# ESPResSo is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# ESPResSo is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# - -# Like `option()`, but create an extra boolean variable to store whether the -# option was set to its default value or to a user-provided value. With this -# command, the project can be installed with optional dependencies without -# the need to provide a list of CMake flags. Unavailable dependencies will be -# silently ignored. However, if the user specifically requested an optional -# dependency by passing the corresponding CMake flag, the build system has -# the possibility to throw an error if the dependency is unavailable. -# -# Note that when calling CMake again without clearing the build folder, -# variables from the previous CMake call are loaded in memory. For example, -# if the user passed a value to an `option_if_available()` the first time but -# not the second time, the variable will still be flagged as a user-provided -# value in the second CMake call. -macro(option_if_available varname help_text default_value) - if(NOT DEFINED ${varname}_IS_DEFAULT_VALUE) - if("${${varname}}" STREQUAL "") - set(${varname}_IS_DEFAULT_VALUE TRUE CACHE INTERNAL "does ${varname} contain the default value?") - else() - set(${varname}_IS_DEFAULT_VALUE FALSE CACHE INTERNAL "does ${varname} contain the default value?") - endif() - endif() - option(${varname} ${help_text} ${default_value}) -endmacro() diff --git a/doc/sphinx/analysis.rst b/doc/sphinx/analysis.rst index d15f3cf7707..a346007fd20 100644 --- a/doc/sphinx/analysis.rst +++ b/doc/sphinx/analysis.rst @@ -411,10 +411,21 @@ or bin edges for the axes. Example:: density_profile.min_y, density_profile.max_y]) plt.show() +Observables based on cylindrical coordinates are also available. +They require special parameters if the cylindrical coordinate system is non-standard, e.g. if you want the origin of the cylindrical coordinates to be at a special location of the box or if you want to make use of symmetries along an axis that is not parallel to the z-axis. +For this purpose, use :class:`espressomd.math.CylindricalTransformationParameters` to create a consistent set of the parameters needed. Example:: + + import espressomd.math + + # shifted and rotated cylindrical coordinates + cyl_transform_params = espressomd.math.CylindricalTransformationParameters( + center=[5.0, 5.0, 0.0], axis=[0, 1, 0], orientation=[0, 0, 1]) + # histogram in cylindrical coordinates density_profile = espressomd.observables.CylindricalDensityProfile( - ids=[0, 1], center=[5.0, 5.0, 0.0], axis=[0, 0, 1], - n_r_bins=8, min_r=0.0, max_r=4.0, + ids=[0, 1], + transform_params = cyl_transform_params, + n_r_bins=8, min_r=1.0, max_r=4.0, n_phi_bins=16, min_phi=-np.pi, max_phi=np.pi, n_z_bins=4, min_z=4.0, max_z=8.0) obs_data = density_profile.calculate() @@ -779,4 +790,3 @@ Note that the cluster objects do not contain copies of the particles, but refer - diff --git a/doc/sphinx/conf.py.in b/doc/sphinx/conf.py.in index aa3f9fd2d61..d85f456e91e 100644 --- a/doc/sphinx/conf.py.in +++ b/doc/sphinx/conf.py.in @@ -115,6 +115,9 @@ pygments_style = 'sphinx' # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True +# sphinxcontrib.bibtex options +bibtex_bibfiles = ['zrefs.bib'] + # -- Options for HTML output ---------------------------------------------- diff --git a/doc/sphinx/electrostatics.rst b/doc/sphinx/electrostatics.rst index d39e7db170e..7c6061ab50c 100644 --- a/doc/sphinx/electrostatics.rst +++ b/doc/sphinx/electrostatics.rst @@ -234,7 +234,7 @@ using it. Electrostatic Layer Correction (ELC) ------------------------------------ -:class:`espressomd.electrostatic_extensions.ELC` +:class:`espressomd.electrostatics.ELC` *ELC* is an extension of the P3M electrostatics solver for explicit 2D periodic systems. It can account for different dielectric jumps on both sides of the @@ -260,8 +260,9 @@ Usage notes: *ELC* is an |es| actor and is used with:: - import espressomd.electrostatic_extensions - elc = electrostatic_extensions.ELC(gap_size=box_l * 0.2, maxPWerror=1e-3) + import espressomd.electrostatics + p3m = espressomd.electrostatics.P3M(prefactor=1, accuracy=1e-4) + elc = espressomd.electrostatics.ELC(p3m_actor=p3m, gap_size=box_l * 0.2, maxPWerror=1e-3) system.actors.add(elc) *ELC* can also be used to simulate 2D periodic systems with image charges, @@ -273,8 +274,8 @@ simulation region (*middle*) to *bottom* (at :math:`z=0`) and from *middle* to are :math:`\Delta_t=\frac{\varepsilon_m-\varepsilon_t}{\varepsilon_m+\varepsilon_t}` and :math:`\Delta_b=\frac{\varepsilon_m-\varepsilon_b}{\varepsilon_m+\varepsilon_b}`:: - elc = electrostatic_extensions.ELC(gap_size=box_l * 0.2, maxPWerror=1e-3, - delta_mid_top=0.9, delta_mid_bot=0.1) + elc = espressomd.electrostatics.ELC(p3m_actor=p3m, gap_size=box_l * 0.2, maxPWerror=1e-3, + delta_mid_top=0.9, delta_mid_bot=0.1) The fully metallic case :math:`\Delta_t=\Delta_b=-1` would lead to divergence of the forces/energies in *ELC* and is therefore only possible with the @@ -283,8 +284,8 @@ of the forces/energies in *ELC* and is therefore only possible with the Toggle ``const_pot`` on to maintain a constant electric potential difference ``pot_diff`` between the xy-planes at :math:`z=0` and :math:`z = L_z - h`:: - elc = electrostatic_extensions.ELC(gap_size=box_l * 0.2, maxPWerror=1e-3, - const_pot=True, delta_mid_bot=100.0) + elc = espressomd.electrostatics.ELC(p3m_actor=p3m, gap_size=box_l * 0.2, maxPWerror=1e-3, + const_pot=True, delta_mid_bot=100.0) This is done by countering the total dipole moment of the system with the electric field :math:`E_{\textrm{induced}}` and superposing a homogeneous diff --git a/doc/sphinx/installation.rst b/doc/sphinx/installation.rst index 3d4949fe3f1..799894bc8a0 100644 --- a/doc/sphinx/installation.rst +++ b/doc/sphinx/installation.rst @@ -86,8 +86,8 @@ are required: .. code-block:: bash - sudo apt install python3-matplotlib python3-scipy ipython3 jupyter-notebook - pip3 install --user 'pint>=0.9' 'jupyter_contrib_nbextensions==0.5.1' \ + sudo apt install python3-matplotlib python3-scipy python3-pint ipython3 jupyter-notebook + pip3 install --user 'jupyter_contrib_nbextensions==0.5.1' \ 'sphinx>=1.6.7,!=2.1.0,!=3.0.0' 'sphinxcontrib-bibtex>=0.3.5' jupyter contrib nbextension install --user jupyter nbextension enable rubberband/main diff --git a/doc/sphinx/system_setup.rst b/doc/sphinx/system_setup.rst index 59b070115c5..52fa8be2f12 100644 --- a/doc/sphinx/system_setup.rst +++ b/doc/sphinx/system_setup.rst @@ -231,16 +231,33 @@ For more information please check :class:`espressomd.cuda_init.CudaInitHandle`. List available CUDA devices ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If you want to list available CUDA devices -you should access :attr:`espressomd.cuda_init.CudaInitHandle.device_list`, e.g., :: +If you want to list available CUDA devices, you should call +:meth:`espressomd.cuda_init.CudaInitHandle.list_devices`:: - system = espressomd.System(box_l=[1, 1, 1]) - - print(system.cuda_init_handle.device_list) + >>> import espressomd + >>> system = espressomd.System(box_l=[1, 1, 1]) + >>> print(system.cuda_init_handle.list_devices()) + {0: 'GeForce RTX 2080', 1: 'GeForce GT 730'} -This attribute is read only and will return a dictionary containing +This method returns a dictionary containing the device id as key and the device name as its value. +To get more details on the CUDA devices for each MPI node, call +:meth:`espressomd.cuda_init.CudaInitHandle.list_devices_properties`:: + + >>> import pprint + >>> import espressomd + >>> system = espressomd.System(box_l=[1, 1, 1]) + >>> pprint.pprint(system.cuda_init_handle.list_devices_properties()) + {'seraue': {0: {'name': 'GeForce RTX 2080', + 'compute_capability': (7, 5), + 'cores': 46, + 'total_memory': 8370061312}, + 1: {'name': 'GeForce GT 730', + 'compute_capability': (3, 5), + 'cores': 2, + 'total_memory': 1014104064}}} + .. _Selection of CUDA device: Selection of CUDA device @@ -250,9 +267,9 @@ When you start ``pypresso`` your first GPU should be selected. If you wanted to use the second GPU, this can be done by setting :attr:`espressomd.cuda_init.CudaInitHandle.device` as follows:: - system = espressomd.System(box_l=[1, 1, 1]) - - system.cuda_init_handle.device = 1 + >>> import espressomd + >>> system = espressomd.System(box_l=[1, 1, 1]) + >>> system.cuda_init_handle.device = 1 Setting a device id outside the valid range or a device which does not meet the minimum requirements will raise diff --git a/doc/tutorials/charged_system/charged_system-1.ipynb b/doc/tutorials/charged_system/charged_system-1.ipynb index d743b8731db..7f223216e8e 100644 --- a/doc/tutorials/charged_system/charged_system-1.ipynb +++ b/doc/tutorials/charged_system/charged_system-1.ipynb @@ -36,7 +36,7 @@ "import espressomd\n", "espressomd.assert_features(['WCA', 'ELECTROSTATICS'])\n", "\n", - "from espressomd import System, interactions, electrostatics, observables, accumulators\n", + "from espressomd import System, interactions, electrostatics, observables, accumulators, math\n", "\n", "import numpy as np\n", "from scipy import optimize\n", @@ -438,20 +438,18 @@ "```python\n", "def setup_profile_calculation(system, delta_N, ion_types, r_min, n_radial_bins):\n", " radial_profile_accumulators = {}\n", + " ctp = math.CylindricalTransformationParameters(center = np.array(system.box_l) / 2.,\n", + " axis = [0, 0, 1],\n", + " orientation = [1, 0, 0])\n", " for ion_type in ion_types:\n", " ion_ids = system.part.select(type=ion_type).id\n", " radial_profile_obs = observables.CylindricalDensityProfile(\n", " ids=ion_ids,\n", - " center=np.array(system.box_l) / 2.,\n", - " axis=[0, 0, 1, ],\n", + " transform_params = ctp,\n", " n_r_bins=n_radial_bins,\n", - " n_phi_bins=1,\n", - " n_z_bins=1,\n", " min_r=r_min,\n", - " min_phi=-np.pi,\n", " min_z=-system.box_l[2] / 2.,\n", " max_r=system.box_l[0] / 2.,\n", - " max_phi=np.pi,\n", " max_z=system.box_l[2] / 2.)\n", "\n", " bin_edges = radial_profile_obs.bin_edges()\n", @@ -945,7 +943,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.5" } }, "nbformat": 4, diff --git a/doc/tutorials/constant_pH/constant_pH.ipynb b/doc/tutorials/constant_pH/constant_pH.ipynb index 5fce1d8475c..f535722405a 100644 --- a/doc/tutorials/constant_pH/constant_pH.ipynb +++ b/doc/tutorials/constant_pH/constant_pH.ipynb @@ -117,7 +117,10 @@ "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", + "import setuptools\n", "import pint # module for working with units and dimensions\n", + "assert setuptools.version.pkg_resources.packaging.specifiers.SpecifierSet('>=0.10.1').contains(pint.__version__), \\\n", + " f'pint version {pint.__version__} is too old: several numpy operations can cast away the unit'\n", "\n", "import espressomd\n", "espressomd.assert_features(['WCA', 'ELECTROSTATICS'])\n", diff --git a/maintainer/CI/build_cmake.sh b/maintainer/CI/build_cmake.sh index bdd690b296d..75083e666c3 100755 --- a/maintainer/CI/build_cmake.sh +++ b/maintainer/CI/build_cmake.sh @@ -96,6 +96,7 @@ set_default_value with_cuda false set_default_value with_cuda_compiler "nvcc" set_default_value build_type "RelWithAssert" set_default_value with_ccache false +set_default_value with_hdf5 true set_default_value with_scafacos false set_default_value with_stokesian_dynamics false set_default_value test_timeout 300 @@ -121,11 +122,19 @@ cmake_params="${cmake_params} -DTEST_TIMEOUT=${test_timeout}" if [ "${with_ccache}" = true ]; then cmake_params="${cmake_params} -DWITH_CCACHE=ON" fi + +if [ "${with_hdf5}" = true ]; then + cmake_params="${cmake_params} -DWITH_HDF5=ON" +else + cmake_params="${cmake_params} -DWITH_HDF5=OFF" +fi + if [ "${with_scafacos}" = true ]; then cmake_params="${cmake_params} -DWITH_SCAFACOS=ON" else cmake_params="${cmake_params} -DWITH_SCAFACOS=OFF" fi + if [ "${with_stokesian_dynamics}" = true ]; then cmake_params="${cmake_params} -DWITH_STOKESIAN_DYNAMICS=ON" else @@ -228,7 +237,7 @@ if [ "${run_checks}" = true ]; then # fail if built with CUDA but no compatible GPU was found if [ "${with_cuda}" = true ] && [ "${hide_gpu}" != true ]; then - ./pypresso -c "import espressomd;assert espressomd.gpu_available(), 'No GPU available'" || exit 1 + ./pypresso -c "import espressomd.cuda_init as gpu;gpu.CudaInitHandle().device = 0" || exit 1 fi # unit tests diff --git a/maintainer/benchmarks/CMakeLists.txt b/maintainer/benchmarks/CMakeLists.txt index 5a6c5907652..6d2fc5e4228 100644 --- a/maintainer/benchmarks/CMakeLists.txt +++ b/maintainer/benchmarks/CMakeLists.txt @@ -1,5 +1,5 @@ include(ProcessorCount) -processorcount(NP) +ProcessorCount(NP) if(EXISTS ${MPIEXEC}) # OpenMPI 3.0 and higher checks the number of processes against the number of diff --git a/maintainer/configs/maxset.hpp b/maintainer/configs/maxset.hpp index 74368803f10..a0171117848 100644 --- a/maintainer/configs/maxset.hpp +++ b/maintainer/configs/maxset.hpp @@ -47,6 +47,7 @@ #define LB_BOUNDARIES_GPU #define ELECTROKINETICS #define EK_BOUNDARIES +#define EK_DEBUG #define MMM1D_GPU #endif diff --git a/maintainer/format/autopep8.sh b/maintainer/format/autopep8.sh index 3e2ca8ab8ac..d05e8756a5a 100755 --- a/maintainer/format/autopep8.sh +++ b/maintainer/format/autopep8.sh @@ -17,8 +17,8 @@ # along with this program. If not, see . -AUTOPEP8_VER=1.3.4 -PYCODESTYLE_VER=2.3.1 +AUTOPEP8_VER=1.5 +PYCODESTYLE_VER=2.5.0 python3 -m autopep8 --help 2>&1 > /dev/null if [ "$?" = "0" ]; then diff --git a/maintainer/format/cmake-format.sh b/maintainer/format/cmake-format.sh index 6c3e69f5f29..d1dab5cb365 100755 --- a/maintainer/format/cmake-format.sh +++ b/maintainer/format/cmake-format.sh @@ -16,7 +16,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -CMAKE_FORMAT_VER=0.6.9 +CMAKE_FORMAT_VER=0.6.11 python3 -m cmake_format 2>&1 > /dev/null if [ "$?" = "0" ]; then CMAKE_FORMAT="python3 -m cmake_format" diff --git a/requirements.txt b/requirements.txt index 86d78b471d9..be665e5cb59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,8 +3,8 @@ numpy>=1.14.0 h5py>=2.7.1 # optional scientific packages scipy>=0.19.0 -MDAnalysis>=0.18 -pint>=0.9 +MDAnalysis>=1.0.0 +pint>=0.10.1 # optional packages for graphics and external devices matplotlib>=2.1.1 vtk>=7.1.1 @@ -23,12 +23,12 @@ sphinxcontrib-bibtex>=0.3.5 # jupyter dependencies jupyter_contrib_nbextensions==0.5.1 # pep8 and its dependencies -autopep8==1.3.4 -pycodestyle==2.3.1 +autopep8==1.5.0 +pycodestyle==2.5.0 # pylint and its dependencies -pylint>=2.2.2 -astroid>=2.1.0 +pylint>=2.4.4 +astroid>=2.3.3 isort>=4.3.4 setuptools>=39.0.1 pre-commit>=2.2.0 -cmake-format==0.6.9 +cmake-format==0.6.11 diff --git a/samples/lb_profile.py b/samples/lb_profile.py index 7f4f6235dd6..06e6e05f516 100644 --- a/samples/lb_profile.py +++ b/samples/lb_profile.py @@ -32,6 +32,7 @@ import espressomd.shapes import espressomd.lbboundaries import espressomd.accumulators +import espressomd.math system = espressomd.System(box_l=[10.0, 10.0, 5.0]) system.time_step = 0.01 @@ -42,16 +43,14 @@ agrid=1.0, dens=1.0, visc=1.0, tau=0.01, ext_force_density=[0, 0, 0.15], kT=1.0, seed=32) system.actors.add(lb_fluid) system.thermostat.set_lb(LB_fluid=lb_fluid, seed=23) -fluid_obs = espressomd.observables.CylindricalLBVelocityProfile( +ctp = espressomd.math.CylindricalTransformationParameters( center=[5.0, 5.0, 0.0], axis=[0, 0, 1], + orientation=[1, 0, 0]) +fluid_obs = espressomd.observables.CylindricalLBVelocityProfile( + transform_params=ctp, n_r_bins=100, - n_phi_bins=1, - n_z_bins=1, - min_r=0.0, max_r=4.0, - min_phi=-np.pi, - max_phi=np.pi, min_z=0.0, max_z=10.0, sampling_density=0.1) diff --git a/samples/load_checkpoint.py b/samples/load_checkpoint.py index f00107ce4f4..807768e3d3d 100644 --- a/samples/load_checkpoint.py +++ b/samples/load_checkpoint.py @@ -30,7 +30,6 @@ espressomd.assert_features(required_features) from espressomd import checkpointing -import numpy as np checkpoint = checkpointing.Checkpoint(checkpoint_id="mycheckpoint") checkpoint.load() @@ -77,10 +76,7 @@ checkpoint.get_registered_objects())) -# integrate system and finally save checkpoint -print("\n### Integrate until user presses ctrl+c ###") +# integrate system print("Integrating...") -np.random.seed(seed=42) -while True: - system.integrator.run(1000) +system.integrator.run(1000) diff --git a/samples/save_checkpoint.py b/samples/save_checkpoint.py index 462945805b0..5c51c57faf4 100644 --- a/samples/save_checkpoint.py +++ b/samples/save_checkpoint.py @@ -43,7 +43,7 @@ # test for user data myvar = "some script variable" checkpoint.register("myvar") -myvar = "updated value" # demo of how the register function works +myvar = myvar + " (updated value)" # demo of how the register function works # test for "system" box_l = 10.7437 diff --git a/samples/visualization_elc.py b/samples/visualization_elc.py index c8c67c73aea..950bc8b3708 100644 --- a/samples/visualization_elc.py +++ b/samples/visualization_elc.py @@ -27,7 +27,6 @@ import espressomd import espressomd.shapes from espressomd import electrostatics -from espressomd import electrostatic_extensions from espressomd import visualization required_features = ["P3M", "WCA"] @@ -75,11 +74,8 @@ system.thermostat.set_langevin(kT=0.1, gamma=1.0, seed=42) p3m = electrostatics.P3M(prefactor=1.0, accuracy=1e-2) - -system.actors.add(p3m) - -elc = electrostatic_extensions.ELC(maxPWerror=1.0, gap_size=elc_gap, - const_pot=True, pot_diff=potential_diff) +elc = electrostatics.ELC(p3m_actor=p3m, maxPWerror=1.0, gap_size=elc_gap, + const_pot=True, pot_diff=potential_diff) system.actors.add(elc) visualizer.run(1) diff --git a/src/config/features.def b/src/config/features.def index 6a203625f25..73c6cd3a7d7 100644 --- a/src/config/features.def +++ b/src/config/features.def @@ -23,7 +23,6 @@ COLLISION_DETECTION NPT ENGINE implies ROTATION, EXTERNAL_FORCES PARTICLE_ANISOTROPY implies ROTATION -STOKESIAN_DYNAMICS requires BLAS and LAPACK STOKESIAN_DYNAMICS implies ROTATION /* Rotation */ @@ -102,6 +101,4 @@ FFTW external H5MD external SCAFACOS external GSL external -BLAS external -LAPACK external STOKESIAN_DYNAMICS external diff --git a/src/core/CellStructure.hpp b/src/core/CellStructure.hpp index b3688a956fe..c8c091fc323 100644 --- a/src/core/CellStructure.hpp +++ b/src/core/CellStructure.hpp @@ -459,22 +459,21 @@ struct CellStructure { public: /** - * @brief Set the particle decomposition to - * AtomDecomposition. + * @brief Set the particle decomposition to AtomDecomposition. * - * @param comm Communicator to use. - * @param box Box Geometry + * @param comm Communicator to use. + * @param box Box Geometry */ void set_atom_decomposition(boost::mpi::communicator const &comm, BoxGeometry const &box); /** - * @brief Set the particle decomposition to - * DomainDecomposition. + * @brief Set the particle decomposition to DomainDecomposition. * - * @param comm Cartesian communicator to use. - * @param box Box Geometry - * @param local_geo Geometry of the local box. + * @param comm Cartesian communicator to use. + * @param range Interaction range. + * @param box Box Geometry + * @param local_geo Geometry of the local box. */ void set_domain_decomposition(boost::mpi::communicator const &comm, double range, BoxGeometry const &box, diff --git a/src/core/DomainDecomposition.hpp b/src/core/DomainDecomposition.hpp index 0c6fe81252e..747f19af010 100644 --- a/src/core/DomainDecomposition.hpp +++ b/src/core/DomainDecomposition.hpp @@ -66,12 +66,10 @@ * */ struct DomainDecomposition : public ParticleDecomposition { - /** Grind dimensions per node. */ + /** Grid dimensions per node. */ Utils::Vector3i cell_grid = {}; - /** cell size. */ + /** Cell size. */ Utils::Vector3d cell_size = {}; - -private: /** Offset in global grid */ Utils::Vector3i cell_offset = {}; /** linked cell grid with ghost frame. */ @@ -120,7 +118,7 @@ struct DomainDecomposition : public ParticleDecomposition { } private: - /** Fill local_cells list and ghost_cells list for use with domain + /** Fill @c m_local_cells list and @c m_ghost_cells list for use with domain * decomposition. */ void mark_cells(); @@ -128,8 +126,8 @@ struct DomainDecomposition : public ParticleDecomposition { /** Fill a communication cell pointer list. Fill the cell pointers of * all cells which are inside a rectangular subgrid of the 3D cell * grid starting from the - * lower left corner lc up to the high top corner hc. The cell - * pointer list part_lists must already be large enough. + * lower left corner @p lc up to the high top corner @p hc. The cell + * pointer list @p part_lists must already be large enough. * \param part_lists List of cell pointers to store the result. * \param lc lower left corner of the subgrid. * \param hc high up corner of the subgrid. @@ -159,10 +157,10 @@ struct DomainDecomposition : public ParticleDecomposition { /** * @brief Split particle list by direction. * - * Moves all particles from src into left - * and right depending if they belong to - * the left or right side from local node - * in direction dir. + * Moves all particles from @p src into @p left + * or @p right depending on whether they belong + * to the left or right side of the local node + * in direction @p dir. * * @param src Particles to sort. * @param left Particles that should go to the left @@ -185,36 +183,36 @@ struct DomainDecomposition : public ParticleDecomposition { * @brief Calculate cell grid dimensions, cell sizes and number of cells. * * Calculates the cell grid, based on the local box size and the range. - * If the number of cells is larger than max_num_cells, - * it increases max_range until the number of cells is - * smaller or equal max_num_cells. It sets: - * cell_grid, - * ghost_cell_grid, - * cell_size, and - * inv_cell_size. + * If the number of cells is larger than @c max_num_cells, + * it increases @c max_range until the number of cells is + * smaller or equal to @c max_num_cells. It sets: + * @c cell_grid, + * @c ghost_cell_grid, + * @c cell_size, and + * @c inv_cell_size. * - * @param range Required interacting range. All pairs closer - * than this distance are found. + * @param range interaction range. All pairs closer + * than this distance are found. */ void create_cell_grid(double range); /** Init cell interactions for cell system domain decomposition. * Initializes the interacting neighbor cell list of a cell. * This list of interacting neighbor cells is used by the Verlet - * algorithm. + * algorithm. */ void init_cell_interactions(); - /** Create communicators for cell structure domain decomposition. (see \ref - * GhostCommunicator) + /** Create communicators for cell structure domain decomposition (see \ref + * GhostCommunicator). */ GhostCommunicator prepare_comm(); /** Maximal number of cells per node. In order to avoid memory - * problems due to the cell grid one has to specify the maximal + * problems due to the cell grid, one has to specify the maximal * number of cells. If the number of cells is larger - * than max_num_cells the cell grid is reduced. - * max_num_cells has to be larger than 27, e.g. one inner cell. + * than @c max_num_cells, the cell grid is reduced. + * @c max_num_cells has to be larger than 27, e.g. one inner cell. */ static constexpr int max_num_cells = 32768; }; diff --git a/src/core/EspressoSystemInterface_cuda.cu b/src/core/EspressoSystemInterface_cuda.cu index adeb11a1491..81948ad920b 100644 --- a/src/core/EspressoSystemInterface_cuda.cu +++ b/src/core/EspressoSystemInterface_cuda.cu @@ -19,7 +19,7 @@ #include "EspressoSystemInterface.hpp" #include "cuda_interface.hpp" -#include "cuda_utils.hpp" +#include "cuda_utils.cuh" #include "errorhandling.hpp" #include diff --git a/src/core/Particle.hpp b/src/core/Particle.hpp index 82bfe54cdc3..10ee2833641 100644 --- a/src/core/Particle.hpp +++ b/src/core/Particle.hpp @@ -39,23 +39,36 @@ enum : uint8_t { }; #ifdef EXTERNAL_FORCES -/** - * \ref ParticleProperties::ext_flag "ext_flag" value for fixed coordinate +/** \ref ParticleProperties::ext_flag "ext_flag" value for fixed coordinate * @c coord. */ #define COORD_FIXED(coord) (2u << (coord)) /** \ref ParticleProperties::ext_flag "ext_flag" mask to check whether any of - * the coordinates is fixed. */ + * the coordinates is fixed. + */ #define COORDS_FIX_MASK (COORD_FIXED(0) | COORD_FIXED(1) | COORD_FIXED(2)) -#else +#else // EXTERNAL_FORCES #define COORD_FIXED(coord) (0) -#endif +#endif // EXTERNAL_FORCES +/** Properties of a self-propelled particle. */ struct ParticleParametersSwimming { + /** Is the particle a swimmer. */ bool swimming = false; + /** Constant velocity to relax to. */ double f_swim = 0.; + /** Imposed constant force. */ double v_swim = 0.; + /** Flag for the swimming mode in a LB fluid. + * Values: + * - -1: pusher + * - +1: puller + * - 0: no swimming + */ int push_pull = 0; + /** Distance of the source of propulsion from the particle + * center in a LB fluid. + */ double dipole_length = 0.; template void serialize(Archive &ar, long int /* version */) { @@ -82,7 +95,7 @@ struct ParticleProperties { double mass = 1.0; #else constexpr static double mass{1.0}; -#endif /* MASS */ +#endif /** rotational inertia */ #ifdef ROTATIONAL_INERTIA @@ -141,48 +154,46 @@ struct ParticleProperties { ar &quat; } } vs_relative; -#endif -#else /* VIRTUAL_SITES */ +#endif // VIRTUAL_SITES_RELATIVE +#else // VIRTUAL_SITES static constexpr bool is_virtual = false; -#endif /* VIRTUAL_SITES */ +#endif // VIRTUAL_SITES #ifdef THERMOSTAT_PER_PARTICLE +/** Friction coefficient for translation */ #ifndef PARTICLE_ANISOTROPY double gamma = -1.; #else Utils::Vector3d gamma = {-1., -1., -1.}; #endif // PARTICLE_ANISOTROPY -/** Friction coefficient gamma for rotation */ #ifdef ROTATION +/** Friction coefficient for rotation */ #ifndef PARTICLE_ANISOTROPY double gamma_rot = -1.; #else Utils::Vector3d gamma_rot = {-1., -1., -1.}; -#endif // ROTATIONAL_INERTIA +#endif // PARTICLE_ANISOTROPY #endif // ROTATION #endif // THERMOSTAT_PER_PARTICLE #ifdef EXTERNAL_FORCES - /** flag whether to fix a particle in space. - Values: -
  • 0 no external influence -
  • 1 apply external force \ref ParticleProperties::ext_force -
  • 2,3,4 fix particle coordinate 0,1,2 -
  • 5 apply external torque \ref ParticleProperties::ext_torque -
- */ + /** Flag for fixed particle coordinates. + * Values: + * - 0: no fixed coordinates + * - 2: fix translation along the x axis + * - 4: fix translation along the y axis + * - 8: fix translation along the z axis + */ uint8_t ext_flag = 0; - /** External force, apply if \ref ParticleProperties::ext_flag == 1. */ + /** External force. */ Utils::Vector3d ext_force = {0, 0, 0}; - #ifdef ROTATION - /** External torque, apply if \ref ParticleProperties::ext_flag == 16. */ + /** External torque. */ Utils::Vector3d ext_torque = {0, 0, 0}; #endif -#else - static constexpr const uint8_t ext_flag = - 0; // no external forces and fixed coordinates -#endif +#else // EXTERNAL_FORCES + static constexpr const uint8_t ext_flag = 0; // no fixed coordinates +#endif // EXTERNAL_FORCES #ifdef ENGINE ParticleParametersSwimming swim; @@ -194,7 +205,7 @@ struct ParticleProperties { ar &type; #ifdef MASS ar &mass; -#endif /* MASS */ +#endif #ifdef ROTATIONAL_INERTIA ar &rinertia; #endif @@ -217,7 +228,7 @@ struct ParticleProperties { #ifdef VIRTUAL_SITES_RELATIVE ar &vs_relative; #endif -#endif /* VIRTUAL_SITES */ +#endif // VIRTUAL_SITES #ifdef THERMOSTAT_PER_PARTICLE ar γ @@ -231,7 +242,7 @@ struct ParticleProperties { #ifdef ROTATION ar &ext_torque; #endif -#endif +#endif // EXTERNAL_FORCES #ifdef ENGINE ar &swim; @@ -256,7 +267,7 @@ struct ParticlePosition { #endif #ifdef BOND_CONSTRAINT - /** particle position at the previous time step */ + /** particle position at the previous time step (RATTLE algorithm) */ Utils::Vector3d p_old = {0., 0., 0.}; #endif @@ -277,6 +288,7 @@ struct ParticlePosition { struct ParticleForce { ParticleForce() = default; ParticleForce(ParticleForce const &) = default; + ParticleForce &operator=(ParticleForce const &) = default; ParticleForce(const Utils::Vector3d &f) : f(f) {} #ifdef ROTATION ParticleForce(const Utils::Vector3d &f, const Utils::Vector3d &torque) @@ -300,7 +312,7 @@ struct ParticleForce { Utils::Vector3d f = {0., 0., 0.}; #ifdef ROTATION - /** torque */ + /** torque. */ Utils::Vector3d torque = {0., 0., 0.}; #endif @@ -313,15 +325,17 @@ struct ParticleForce { }; /** Momentum information on a particle. Information not contained in - communication of ghost particles so far, but a communication would - be necessary for velocity dependent potentials. */ + * communication of ghost particles so far, but a communication would + * be necessary for velocity-dependent potentials. + */ struct ParticleMomentum { /** velocity. */ Utils::Vector3d v = {0., 0., 0.}; #ifdef ROTATION - /** angular velocity - ALWAYS IN PARTICLE FIXED, I.E., CO-ROTATING COORDINATE SYSTEM */ + /** angular velocity. + * ALWAYS IN PARTICLE FIXED, I.E., CO-ROTATING COORDINATE SYSTEM. + */ Utils::Vector3d omega = {0., 0., 0.}; #endif @@ -334,10 +348,10 @@ struct ParticleMomentum { }; /** Information on a particle that is needed only on the - * node the particle belongs to + * node the particle belongs to. */ struct ParticleLocal { - /** check whether a particle is a ghost or not */ + /** is particle a ghost particle. */ bool ghost = false; /** position in the last time step before last Verlet list update. */ Utils::Vector3d p_old = {0, 0, 0}; @@ -387,10 +401,9 @@ struct Particle { // NOLINT(bugprone-exception-escape) private: #ifdef EXCLUSIONS - /** list of particles, with which this particle has no nonbonded + /** list of particles, with which this particle has no non-bonded * interactions */ - std::vector el; #endif diff --git a/src/core/RuntimeErrorStream.cpp b/src/core/RuntimeErrorStream.cpp index 654d5d39781..8bd24eb8c4f 100644 --- a/src/core/RuntimeErrorStream.cpp +++ b/src/core/RuntimeErrorStream.cpp @@ -22,8 +22,7 @@ #include namespace ErrorHandling { -/** ostringstream is not copyable, but it is fine here to copy just the content. - */ +// ostringstream is not copyable, but it is fine here to copy just the content. RuntimeErrorStream::RuntimeErrorStream(const RuntimeErrorStream &rhs) : m_ec(rhs.m_ec), m_line(rhs.m_line), m_file(rhs.m_file), m_function(rhs.m_function) { diff --git a/src/core/accumulators/Correlator.cpp b/src/core/accumulators/Correlator.cpp index a8a5d4b24c1..7345eddd134 100644 --- a/src/core/accumulators/Correlator.cpp +++ b/src/core/accumulators/Correlator.cpp @@ -418,8 +418,6 @@ void Correlator::update() { } } } - - m_last_update = sim_time; } int Correlator::finalize() { @@ -543,7 +541,6 @@ std::string Correlator::get_internal_state() const { oa << A_accumulated_average; oa << B_accumulated_average; oa << n_data; - oa << m_last_update; return ss.str(); } @@ -565,7 +562,6 @@ void Correlator::set_internal_state(std::string const &state) { ia >> A_accumulated_average; ia >> B_accumulated_average; ia >> n_data; - ia >> m_last_update; } } // namespace Accumulators diff --git a/src/core/accumulators/Correlator.hpp b/src/core/accumulators/Correlator.hpp index 5fcf9f46c27..bd25147798c 100644 --- a/src/core/accumulators/Correlator.hpp +++ b/src/core/accumulators/Correlator.hpp @@ -201,7 +201,6 @@ class Correlator : public AccumulatorBase { int tau_lin() const { return m_tau_lin; } double tau_max() const { return m_tau_max; } - double last_update() const { return m_last_update; } double dt() const { return m_dt; } Utils::Vector3d const &correlation_args() const { return m_correlation_args; } @@ -259,8 +258,6 @@ class Correlator : public AccumulatorBase { std::vector B_accumulated_average; ///< all B values are added up here size_t n_data; ///< a counter for calculated averages and variances - double m_last_update; - size_t dim_A; ///< dimensionality of A size_t dim_B; ///< dimensionality of B std::vector m_shape; ///< dimensionality of the correlation diff --git a/src/core/actor/DipolarBarnesHut.hpp b/src/core/actor/DipolarBarnesHut.hpp index 443a13a06a1..b010ebe3804 100644 --- a/src/core/actor/DipolarBarnesHut.hpp +++ b/src/core/actor/DipolarBarnesHut.hpp @@ -27,6 +27,7 @@ #include "DipolarBarnesHut_cuda.cuh" #include "SystemInterface.hpp" #include "cuda_interface.hpp" +#include "cuda_utils.hpp" #include "electrostatics_magnetostatics/dipole.hpp" #include "errorhandling.hpp" @@ -38,7 +39,7 @@ typedef float dds_float; class DipolarBarnesHut : public Actor { public: DipolarBarnesHut(SystemInterface &s, float epssq, float itolsq) { - k = static_cast(dipole.prefactor); + m_k = static_cast(dipole.prefactor); m_epssq = epssq; m_itolsq = itolsq; setBHPrecision(&m_epssq, &m_itolsq); @@ -53,7 +54,12 @@ class DipolarBarnesHut : public Actor { }; void computeForces(SystemInterface &s) override { - allocBHmemCopy(static_cast(s.npart_gpu()), &m_bh_data); + try { + allocBHmemCopy(static_cast(s.npart_gpu()), &m_bh_data); + } catch (cuda_runtime_error const &err) { + runtimeErrorMsg() << "DipolarBarnesHut: " << err.what(); + return; + } fillConstantPointers(s.rGpuBegin(), s.dipGpuBegin(), m_bh_data); initBHgpu(m_bh_data.blocks); @@ -61,12 +67,17 @@ class DipolarBarnesHut : public Actor { buildTreeBH(m_bh_data.blocks); summarizeBH(m_bh_data.blocks); sortBH(m_bh_data.blocks); - if (forceBH(&m_bh_data, k, s.fGpuBegin(), s.torqueGpuBegin())) { + if (forceBH(&m_bh_data, m_k, s.fGpuBegin(), s.torqueGpuBegin())) { runtimeErrorMsg() << "kernels encountered a functional error"; } }; void computeEnergy(SystemInterface &s) override { - allocBHmemCopy(static_cast(s.npart_gpu()), &m_bh_data); + try { + allocBHmemCopy(static_cast(s.npart_gpu()), &m_bh_data); + } catch (cuda_runtime_error const &err) { + runtimeErrorMsg() << "DipolarBarnesHut: " << err.what(); + return; + } fillConstantPointers(s.rGpuBegin(), s.dipGpuBegin(), m_bh_data); initBHgpu(m_bh_data.blocks); @@ -74,13 +85,13 @@ class DipolarBarnesHut : public Actor { buildTreeBH(m_bh_data.blocks); summarizeBH(m_bh_data.blocks); sortBH(m_bh_data.blocks); - if (energyBH(&m_bh_data, k, (&(((CUDA_energy *)s.eGpu())->dipolar)))) { + if (energyBH(&m_bh_data, m_k, (&(((CUDA_energy *)s.eGpu())->dipolar)))) { runtimeErrorMsg() << "kernels encountered a functional error"; } }; -protected: - float k; +private: + float m_k; float m_epssq; float m_itolsq; BHData m_bh_data = {0, 0, 0, nullptr, nullptr, diff --git a/src/core/actor/DipolarBarnesHut_cuda.cu b/src/core/actor/DipolarBarnesHut_cuda.cu index 761a21098a2..1b6b72c4c61 100644 --- a/src/core/actor/DipolarBarnesHut_cuda.cu +++ b/src/core/actor/DipolarBarnesHut_cuda.cu @@ -28,7 +28,8 @@ #include "DipolarBarnesHut_cuda.cuh" #include "cuda_init.hpp" -#include "cuda_utils.hpp" +#include "cuda_utils.cuh" +#include "errorhandling.hpp" #include #include @@ -1181,11 +1182,8 @@ void allocBHmemCopy(int nbodies, BHData *bh_data) { bh_data->nbodies = nbodies; - int devID = -1; - EspressoGpuDevice dev; - - devID = cuda_get_device(); - cuda_get_device_props(devID, dev); + auto const devID = cuda_get_device(); + EspressoGpuDevice const dev = cuda_get_device_props(devID); bh_data->blocks = dev.n_cores; // Each node corresponds to a split of the cubic box in 3D space to equal diff --git a/src/core/actor/DipolarBarnesHut_cuda.cuh b/src/core/actor/DipolarBarnesHut_cuda.cuh index 3a09682bd30..95079830c2e 100644 --- a/src/core/actor/DipolarBarnesHut_cuda.cuh +++ b/src/core/actor/DipolarBarnesHut_cuda.cuh @@ -28,94 +28,98 @@ typedef float dds_float; typedef struct { - // CUDA blocks + /// CUDA blocks int blocks; - // each node corresponds to a split of the cubic box in 3D space to equal - // cubic boxes hence, 8 octant nodes per particle is a theoretical octree - // limit: a maximal number of octree nodes is "nnodesd" and a number of - // particles "nbodiesd" respectively. + /// each node corresponds to a split of the cubic box in 3D space to equal + /// cubic boxes hence, 8 octant nodes per particle is a theoretical octree + /// limit: a maximal number of octree nodes is "nnodesd" and a number of + /// particles "nbodiesd" respectively. int nbodies; int nnodes; - // particle positions on the device: + /// particle positions on the device: float *r; - // particle dipole moments on the device: + /// particle dipole moments on the device: float *u; - // Not a real mass. Just a node weight coefficient. + /// Not a real mass. Just a node weight coefficient. float *mass; - // min positions' coordinates of the BH box. + /// min positions' coordinates of the Barnes-Hut box. float *minp; - // max positions' coordinates of the BH box. + /// max positions' coordinates of the Barnes-Hut box. float *maxp; - // Error report. + /// Error report. int *err; - // Indices of particles sorted according to the tree linear representation. + /// Indices of particles sorted according to the tree linear representation. int *sort; - // The tree linear representation. + /// The tree linear representation. int *child; - // Supplementary array: a tree nodes (division octant cells/particles inside) - // counting. + /// Supplementary array: a tree nodes (division octant cells/particles inside) + /// counting. int *count; - // Start indices for the per-cell sorting. + /// Start indices for the per-cell sorting. int *start; - // trace the max loops for a threads' sync + /// trace the max loops for a threads' sync int *max_lps; } BHData; -// thread count for different kernels (see kernel calls from below functions). +/// @name Barnes-Hut thread count for different kernels. +/// @{ #define THREADS1 512 #define THREADS2 1024 #define THREADS3 1024 #define THREADS4 1024 #define THREADS5 256 +/// @} -// block count = factor * #SMs -// for different kernels (see kernel calls from below functions). +/// @name Barnes-Hut block factor for different kernels. +/// block count = factor * number of blocks +/// @{ #define FACTOR1 2 #define FACTOR2 1 #define FACTOR3 1 /* must all be resident at the same time */ #define FACTOR4 1 /* must all be resident at the same time */ #define FACTOR5 4 +/// @} -// Warp size. +/// Barnes-Hut warp size. #define WARPSIZE 32 -// Max possible depth of the Barnes-Hut tree branching. +/// Maximal depth of the Barnes-Hut tree branching. #define MAXDEPTH 32 -// Function to set the BH method parameters. +/// Function to set the Barnes-Hut parameters. void setBHPrecision(float *epssq, float *itolsq); -// An allocation of the GPU device memory and an initialization where it is -// needed. +/// An allocation of the GPU device memory and an initialization where it is +/// needed. void allocBHmemCopy(int nbodies, BHData *bh_data); -// Populating of array pointers allocated in GPU device before. -// Copy the particle data to the Barnes-Hut related arrays. +/// Populating of array pointers allocated in GPU device before. +/// Copy the particle data to the Barnes-Hut related arrays. void fillConstantPointers(float *r, float *dip, BHData bh_data); -// Required BH CUDA init. +/// Barnes-Hut CUDA initialization. void initBHgpu(int blocks); -// Building Barnes-Hut spatial min/max position box +/// Building Barnes-Hut spatial min/max position box void buildBoxBH(int blocks); -// Building Barnes-Hut tree in a linear child array representation -// of octant cells and particles inside. +/// Building Barnes-Hut tree in a linear child array representation +/// of octant cells and particles inside. void buildTreeBH(int blocks); -// Calculate octant cells masses and cell index counts. -// Determine cells centers of mass and total dipole moments -// on all possible levels of the BH tree. +/// Calculate octant cells masses and cell index counts. +/// Determine cells centers of mass and total dipole moments +/// on all possible levels of the Barnes-Hut tree. void summarizeBH(int blocks); -// Sort particle indexes according to the BH tree representation. -// Crucial for the per-warp performance tuning of forceCalculationKernel and -// energyCalculationKernel. +/// Sort particle indexes according to the Barnes-Hut tree representation. +/// Crucial for the per-warp performance tuning of @c forceCalculationKernel +/// and @c energyCalculationKernel. void sortBH(int blocks); -// Force calculation. +/// Barnes-Hut force calculation. int forceBH(BHData *bh_data, dds_float k, float *f, float *torque); -// Energy calculation. +/// Barnes-Hut energy calculation. int energyBH(BHData *bh_data, dds_float k, float *E); #endif // DIPOLAR_BARNES_HUT diff --git a/src/core/actor/DipolarDirectSum_cuda.cu b/src/core/actor/DipolarDirectSum_cuda.cu index 91462241478..278bf4e9eb4 100644 --- a/src/core/actor/DipolarDirectSum_cuda.cu +++ b/src/core/actor/DipolarDirectSum_cuda.cu @@ -21,7 +21,7 @@ #ifdef DIPOLAR_DIRECT_SUM -#include "cuda_utils.hpp" +#include "cuda_utils.cuh" #include #include diff --git a/src/core/actor/Mmm1dgpuForce_cuda.cu b/src/core/actor/Mmm1dgpuForce_cuda.cu index 89f81c67844..e217e652e5e 100644 --- a/src/core/actor/Mmm1dgpuForce_cuda.cu +++ b/src/core/actor/Mmm1dgpuForce_cuda.cu @@ -28,7 +28,7 @@ #include "EspressoSystemInterface.hpp" #include "actor/Mmm1dgpuForce.hpp" #include "actor/specfunc_cuda.hpp" -#include "cuda_utils.hpp" +#include "cuda_utils.cuh" #include "electrostatics_magnetostatics/coulomb.hpp" #include "electrostatics_magnetostatics/mmm-modpsi.hpp" #include "electrostatics_magnetostatics/mmm1d.hpp" diff --git a/src/core/cuda_common_cuda.cu b/src/core/cuda_common_cuda.cu index c35f3858279..0e1bcba4556 100644 --- a/src/core/cuda_common_cuda.cu +++ b/src/core/cuda_common_cuda.cu @@ -21,7 +21,7 @@ #include "ParticleRange.hpp" #include "cuda_init.hpp" #include "cuda_interface.hpp" -#include "cuda_utils.hpp" +#include "cuda_utils.cuh" #include "errorhandling.hpp" #include "CudaDeviceAllocator.hpp" @@ -34,6 +34,7 @@ #include #include +#include extern int this_node; @@ -66,16 +67,12 @@ CUDA_energy energy_host; pinned_vector particle_torques_host; -/**cuda streams for parallel computing on cpu and gpu */ cudaStream_t stream[1]; -cudaError_t _err; -cudaError_t CU_err; - -void _cuda_check_errors(const dim3 &block, const dim3 &grid, - const char *function, const char *file, - unsigned int line) { - CU_err = cudaGetLastError(); +void cuda_check_errors_exit(const dim3 &block, const dim3 &grid, + const char *function, const char *file, + unsigned int line) { + cudaError_t CU_err = cudaGetLastError(); if (CU_err != cudaSuccess) { fprintf(stderr, "%d: error \"%s\" calling %s with dim %d %d %d, grid %d %d " @@ -134,25 +131,23 @@ void resize_buffers(size_t number_of_particles) { */ void gpu_init_particle_comm() { if (this_node == 0 && global_part_vars_host.communication_enabled == 0) { - if (cuda_get_n_gpus() == -1) { - runtimeErrorMsg() - << "Unable to initialize CUDA as no sufficient GPU is available."; - errexit(); - } - if (cuda_get_n_gpus() > 1) { - runtimeWarningMsg() << "More than one GPU detected, please note ESPResSo " - "uses device 0 by default regardless of usage or " - "capability. The GPU to be used can be modified " - "by setting System.cuda_init_handle.device."; - if (cuda_check_gpu(0) != ES_OK) { - runtimeWarningMsg() - << "CUDA device 0 is not capable of running ESPResSo but is used " - "by default. ESPResSo has detected a CUDA capable card but it " - "is not the one used by ESPResSo by default. Please set the " - "GPU to use by setting System.cuda_init_handle.device. A list " - "of available GPUs is available through " - "System.cuda_init_handle.device_list."; + try { + if (cuda_get_n_gpus() == 0) { + fprintf(stderr, "ERROR: No GPU was found.\n"); + errexit(); } + auto const devID = cuda_get_device(); + auto const compute_capability = cuda_check_gpu_compute_capability(devID); + auto const communication_test = cuda_test_device_access(); + if (compute_capability != ES_OK or communication_test != ES_OK) { + fprintf(stderr, + "ERROR: CUDA device %i is not capable of running ESPResSo.\n", + devID); + errexit(); + } + } catch (cuda_runtime_error const &err) { + fprintf(stderr, "ERROR: %s\n", err.what()); + errexit(); } } global_part_vars_host.communication_enabled = 1; @@ -230,10 +225,11 @@ CUDA_energy copy_energy_from_GPU() { return energy_host; } -void _cuda_safe_mem(cudaError_t CU_err, const char *file, unsigned int line) { - if (cudaSuccess != CU_err) { - fprintf(stderr, "Cuda Memory error at %s:%u.\n", file, line); - printf("CUDA error: %s\n", cudaGetErrorString(CU_err)); +void cuda_safe_mem_exit(cudaError_t CU_err, const char *file, + unsigned int line) { + if (CU_err != cudaSuccess) { + fprintf(stderr, "CUDA Memory error at %s:%u.\n", file, line); + fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(CU_err)); if (CU_err == cudaErrorInvalidValue) fprintf(stderr, "You may have tried to allocate zero memory at %s:%u.\n", file, line); diff --git a/src/core/cuda_init.cpp b/src/core/cuda_init.cpp index 62a9e8d5cc5..15fec94982a 100644 --- a/src/core/cuda_init.cpp +++ b/src/core/cuda_init.cpp @@ -22,8 +22,10 @@ #ifdef CUDA #include "cuda_init.hpp" +#include "cuda_utils.hpp" #include "communication.hpp" +#include "errorhandling.hpp" #include @@ -34,13 +36,13 @@ #include #include -/** Helper class force device set. +/** Helper class for device sets. */ struct CompareDevices { bool operator()(const EspressoGpuDevice &a, const EspressoGpuDevice &b) const { const int name_comp = strncmp(a.proc_name, b.proc_name, 63); - /* Both devs are from the same node, order by id */ + /* if both devices are from the same node, order by id */ if (name_comp == 0) return a.id < b.id; @@ -49,49 +51,50 @@ struct CompareDevices { }; /** Gather list of CUDA devices on all nodes on the master node. - * It relies on MPI_Get_processor_name() to get a unique identifier of - * the physical node, as opposed to the logical rank of which there can - * be more than one on one node. + * It relies on MPI_Get_processor_name() to get a unique identifier + * of the physical node, as opposed to the logical rank of which there can + * be more than one per node. */ -std::vector cuda_gather_gpus() { - int n_gpus = cuda_get_n_gpus(); - char proc_name[MPI_MAX_PROCESSOR_NAME]; - int proc_name_len; +static std::vector mpi_cuda_gather_gpus_local() { /* List of local devices */ - std::vector devices; + std::vector devices_local; /* Global unique device list (only relevant on master) */ - std::vector g_devices; - int *n_gpu_array = nullptr; + std::vector devices_global; - MPI_Get_processor_name(proc_name, &proc_name_len); + int n_devices; + try { + n_devices = cuda_get_n_gpus(); + } catch (cuda_runtime_error const &err) { + n_devices = 0; + } - /* Truncate to 63 chars to fit struct. */ - if (strlen(proc_name) > 63) + int proc_name_len; + char proc_name[MPI_MAX_PROCESSOR_NAME]; + MPI_Get_processor_name(proc_name, &proc_name_len); + if (std::strlen(proc_name) > 63) proc_name[63] = '\0'; - for (int i = 0; i < n_gpus; ++i) { - /* Check if device has at least minimum compute capability */ - if (cuda_check_gpu(i) == ES_OK) { - EspressoGpuDevice device; - if (cuda_get_device_props(i, device) == ES_OK) { - strncpy(device.proc_name, proc_name, 64); - device.proc_name[63] = '\0'; - device.node = this_node; - devices.push_back(device); - } + for (int i = 0; i < n_devices; ++i) { + try { + EspressoGpuDevice device = cuda_get_device_props(i); + std::strncpy(device.proc_name, proc_name, 64); + device.proc_name[63] = '\0'; + device.node = this_node; + devices_local.push_back(device); + } catch (cuda_runtime_error const &err) { + // pass } } - /* Update n_gpus to number of usable devices */ - n_gpus = devices.size(); + int const n_gpus = static_cast(devices_local.size()); if (this_node == 0) { std::set device_set; - n_gpu_array = new int[n_nodes]; + int *n_gpu_array = new int[n_nodes]; MPI_Gather(&n_gpus, 1, MPI_INT, n_gpu_array, 1, MPI_INT, 0, MPI_COMM_WORLD); /* insert local devices */ - std::copy(devices.begin(), devices.end(), + std::copy(devices_local.begin(), devices_local.end(), std::inserter(device_set, device_set.begin())); EspressoGpuDevice device; @@ -106,18 +109,25 @@ std::vector cuda_gather_gpus() { } /* Copy unique devices to result, if any */ std::copy(device_set.begin(), device_set.end(), - std::inserter(g_devices, g_devices.begin())); + std::inserter(devices_global, devices_global.begin())); delete[] n_gpu_array; } else { /* Send number of devices to master */ - MPI_Gather(&n_gpus, 1, MPI_INT, n_gpu_array, 1, MPI_INT, 0, MPI_COMM_WORLD); - /* Send devices to maser */ - for (auto &device : devices) { + MPI_Gather(&n_gpus, 1, MPI_INT, nullptr, 1, MPI_INT, 0, MPI_COMM_WORLD); + /* Send devices to master */ + for (auto const &device : devices_local) { MPI_Send(&device, sizeof(EspressoGpuDevice), MPI_BYTE, 0, 0, MPI_COMM_WORLD); } } - return g_devices; + return devices_global; +} + +REGISTER_CALLBACK_MASTER_RANK(mpi_cuda_gather_gpus_local) + +std::vector cuda_gather_gpus() { + return mpi_call(Communication::Result::master_rank, + mpi_cuda_gather_gpus_local); } #endif /* CUDA */ diff --git a/src/core/cuda_init.hpp b/src/core/cuda_init.hpp index eab42a7490f..2447c532078 100644 --- a/src/core/cuda_init.hpp +++ b/src/core/cuda_init.hpp @@ -30,20 +30,21 @@ * old datatypes, as it is intended for MPI communication. */ struct EspressoGpuDevice { - /* Local CUDA device id */ + /** Local CUDA device id */ int id; - /* Node identification */ + /** Local CUDA device name */ + char name[64]; + /** Node identification */ char proc_name[64]; - /* MPI process identification */ + /** MPI process identification */ int node; - /* Compute capability */ + /** Compute capability (major) */ int compute_capability_major; + /** Compute capability (minor) */ int compute_capability_minor; - /* Name */ - char name[64]; - /* Total Memory */ - int total_memory; - /* Number of cores */ + /** Total Memory */ + size_t total_memory; + /** Number of cores */ int n_cores; }; @@ -53,62 +54,52 @@ void cuda_init(); /** Get the number of CUDA devices. * - * @return the number of GPUs, or -1 if CUDA could not be - * initialized. The error message from CUDA can be found in \ref - * cuda_error. + * @return the number of GPUs. */ int cuda_get_n_gpus(); -/** Check that a given GPU is capable of what we need, that is, at - * least compute capability 1.1. +/** Check that a given GPU has compute capability. + * The minimal compute capability required by ESPResSo is + * \ref computeCapabilityMinMajor . \ref computeCapabilityMinMinor . * * @param dev CUDA device number - * @return \ref ES_OK if and only if the GPU with the given id is - * usable for CUDA computations. Only devices with compute - * capability of 1.1 or higher are ok, since atomic operations are - * required for CUDA-LB. + * @return \ref ES_OK if the GPU meets the requirements, else \ref ES_ERROR. */ -int cuda_check_gpu(int dev); +int cuda_check_gpu_compute_capability(int dev); /** Get the name of a CUDA device. * - * @param dev the CUDA device number to ask the name for - * @param name a buffer to write the name to, at least 64 characters + * @param[in] dev the CUDA device number to ask the name for + * @param[out] name a buffer to write the name to, at least 64 characters */ void cuda_get_gpu_name(int dev, char name[64]); /** Choose a device for future CUDA computations. * * @param dev the device to use - * @return \ref ES_OK on success, \ref ES_ERROR else. The error - * message from CUDA can be found in \ref cuda_error. */ -int cuda_set_device(int dev); +void cuda_set_device(int dev); /** Get the current CUDA device. * - * @return the current device's number or -1 if an error occurred. The error - * message from CUDA can be found in \ref cuda_error. + * @return the current device's number. */ int cuda_get_device(); /** Test if actual CUDA device works. * @return \ref ES_OK on success, \ref ES_ERROR else. - * The error message from CUDA can be found in \ref cuda_error. */ int cuda_test_device_access(); -/** Gather unique list of CUDA devices on all nodes - * @return vector of device on master, empty vector on other nodes. +/** Gather unique list of CUDA devices on all nodes. + * @return vector of device properties. */ std::vector cuda_gather_gpus(); /** Get properties of a CUDA device + * @param dev CUDA device number */ -int cuda_get_device_props(int dev, EspressoGpuDevice &d); - -/** Current error message of CUDA. */ -extern const char *cuda_error; +EspressoGpuDevice cuda_get_device_props(int dev); #endif // ifdef CUDA #endif diff --git a/src/core/cuda_init_cuda.cu b/src/core/cuda_init_cuda.cu index 31292cd2d39..e434d7132f0 100644 --- a/src/core/cuda_init_cuda.cu +++ b/src/core/cuda_init_cuda.cu @@ -20,10 +20,12 @@ #include #include "cuda_init.hpp" -#include "cuda_utils.hpp" +#include "cuda_utils.cuh" #include +#include + #if defined(OMPI_MPI_H) || defined(_MPI_H) #error CU-file includes mpi.h! This should not happen! #endif @@ -36,32 +38,20 @@ static const int computeCapabilityMinMajor = 3; static const int computeCapabilityMinMinor = 0; /**@}*/ -const char *cuda_error; - -void cuda_init() { cudaStreamCreate(&stream[0]); } +void cuda_init() { CUDA_CHECK(cudaStreamCreate(&stream[0])) } -/// get the number of CUDA devices. int cuda_get_n_gpus() { int deviceCount; - cudaError_t error = cudaGetDeviceCount(&deviceCount); - if (error != cudaSuccess) { - cuda_error = cudaGetErrorString(error); - return -1; - } + CUDA_CHECK(cudaGetDeviceCount(&deviceCount)) return deviceCount; } -int cuda_check_gpu(int dev) { +int cuda_check_gpu_compute_capability(int dev) { cudaDeviceProp deviceProp; - cudaError_t error = cudaGetDeviceProperties(&deviceProp, dev); - if (error != cudaSuccess) { - cuda_error = cudaGetErrorString(error); - return ES_ERROR; - } + CUDA_CHECK(cudaGetDeviceProperties(&deviceProp, dev)) if (deviceProp.major < computeCapabilityMinMajor || (deviceProp.major == computeCapabilityMinMajor && deviceProp.minor < computeCapabilityMinMinor)) { - cuda_error = "compute capability insufficient"; return ES_ERROR; } return ES_OK; @@ -69,53 +59,36 @@ int cuda_check_gpu(int dev) { void cuda_get_gpu_name(int dev, char name[64]) { cudaDeviceProp deviceProp; - cudaError_t error = cudaGetDeviceProperties(&deviceProp, dev); - if (error != cudaSuccess) { - cuda_error = cudaGetErrorString(error); - strncpy(name, "no GPU", 63); - } else { - strncpy(name, deviceProp.name, 63); - } + CUDA_CHECK(cudaGetDeviceProperties(&deviceProp, dev)) + std::strncpy(name, deviceProp.name, 63); name[63] = 0; } -int cuda_get_device_props(const int dev, EspressoGpuDevice &d) { +EspressoGpuDevice cuda_get_device_props(const int dev) { cudaDeviceProp deviceProp; - cudaError_t error = cudaGetDeviceProperties(&deviceProp, dev); - if (error != cudaSuccess) { - cuda_error = cudaGetErrorString(error); - return ES_ERROR; - } - strncpy(d.name, deviceProp.name, 64); - d.id = dev; - d.total_memory = deviceProp.totalGlobalMem; - d.compute_capability_major = deviceProp.major; - d.compute_capability_minor = deviceProp.minor; - d.n_cores = deviceProp.multiProcessorCount; - - return ES_OK; + CUDA_CHECK(cudaGetDeviceProperties(&deviceProp, dev)) + EspressoGpuDevice device{dev, + "", + "", + -1, + deviceProp.major, + deviceProp.minor, + deviceProp.totalGlobalMem, + deviceProp.multiProcessorCount}; + std::strncpy(device.name, deviceProp.name, 64); + device.name[63] = '\0'; + return device; } -int cuda_set_device(int dev) { - cudaSetDevice(dev); - cudaStreamDestroy(stream[0]); - cudaError_t error = cudaStreamCreate(&stream[0]); - - if (error != cudaSuccess) { - cuda_error = cudaGetErrorString(error); - throw std::runtime_error(cuda_error); - } - - return ES_OK; +void cuda_set_device(int dev) { + CUDA_CHECK(cudaSetDevice(dev)) + CUDA_CHECK(cudaStreamDestroy(stream[0])) + CUDA_CHECK(cudaStreamCreate(&stream[0])) } int cuda_get_device() { int dev; - cudaError_t error = cudaGetDevice(&dev); - if (error != cudaSuccess) { - cuda_error = cudaGetErrorString(error); - return -1; - } + CUDA_CHECK(cudaGetDevice(&dev)) return dev; } @@ -126,23 +99,23 @@ int cuda_test_device_access() { err = cudaMalloc((void **)&d, sizeof(int)); if (err != cudaSuccess) { - cuda_error = cudaGetErrorString(err); - return ES_ERROR; + throw cuda_runtime_error_cuda(err); } err = cudaMemcpy(d, &h, sizeof(int), cudaMemcpyHostToDevice); if (err != cudaSuccess) { - cuda_error = cudaGetErrorString(err); - return ES_ERROR; + cudaFree(d); + throw cuda_runtime_error_cuda(err); } h = 0; err = cudaMemcpy(&h, d, sizeof(int), cudaMemcpyDeviceToHost); cudaFree(d); - - if ((h == 42) && (err == cudaSuccess)) { - return ES_OK; + if (err != cudaSuccess) { + throw cuda_runtime_error_cuda(err); } - cuda_error = cudaGetErrorString(err); - return ES_ERROR; + if (h != 42) { + return ES_ERROR; + } + return ES_OK; } #endif /* defined(CUDA) */ diff --git a/src/core/cuda_interface.cpp b/src/core/cuda_interface.cpp index 3b6c07a8a27..a954a4e9ac8 100644 --- a/src/core/cuda_interface.cpp +++ b/src/core/cuda_interface.cpp @@ -96,7 +96,7 @@ void cuda_mpi_get_particles( /* pack local parts into buffer */ pack_particles(particles, buffer.data()); - Utils::Mpi::gather_buffer(buffer.data(), buffer.size(), comm_cart); + Utils::Mpi::gather_buffer(buffer, comm_cart); } else { particle_data_host.resize(n_part); diff --git a/src/core/cuda_interface.hpp b/src/core/cuda_interface.hpp index 6c518f649a2..0896d1bf703 100644 --- a/src/core/cuda_interface.hpp +++ b/src/core/cuda_interface.hpp @@ -57,7 +57,6 @@ struct CUDA_particle_data { /** particle position given from md part*/ Vector3f p; -#if defined(CUDA) /** particle id */ int identity; #ifdef VIRTUAL_SITES @@ -68,7 +67,6 @@ struct CUDA_particle_data { /** particle momentum struct velocity p.m->v*/ Vector3f v; -#endif #ifdef ROTATION Vector3f director; diff --git a/src/core/cuda_utils.cuh b/src/core/cuda_utils.cuh new file mode 100644 index 00000000000..5eec836394b --- /dev/null +++ b/src/core/cuda_utils.cuh @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2013-2019 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef _CUDA_UTILS_CUH +#define _CUDA_UTILS_CUH + +#if !defined(__CUDACC__) +#error Do not include CUDA headers in normal C++-code!!! +#endif + +#include "cuda_utils.hpp" + +#include + +#include +#include + +class cuda_runtime_error_cuda : public cuda_runtime_error { +public: + cuda_runtime_error_cuda(cudaError_t error) + : cuda_runtime_error(error_message(error)) {} + +private: + std::string error_message(cudaError_t error) { + const char *cuda_error = cudaGetErrorString(error); + return std::string("CUDA error: ") + cuda_error; + } +}; + +/** Convert CUDA error codes into runtime errors. */ +#define CUDA_CHECK(statement) \ + { \ + cudaError_t const error_code = (statement); \ + if (error_code != cudaSuccess) { \ + throw cuda_runtime_error_cuda(error_code); \ + } \ + } + +/** CUDA streams for parallel computing on CPU and GPU */ +extern cudaStream_t stream[1]; + +/** In case of error during CUDA memory allocation and memory copy, print + * the error message and exit. + * @param CU_err cuda error code + * @param file .cu file were the error took place + * @param line line of the file were the error took place + */ +void cuda_safe_mem_exit(cudaError_t CU_err, const char *file, + unsigned int line); + +/** In case of error during a CUDA operation, print the error message and exit. + */ +void cuda_check_errors_exit(const dim3 &block, const dim3 &grid, + const char *function, const char *file, + unsigned int line); + +#define cuda_safe_mem(a) cuda_safe_mem_exit((a), __FILE__, __LINE__) + +/** Calculate @c dim_grid for CUDA kernel calls. */ +inline dim3 calculate_dim_grid(unsigned const threads_x, + unsigned const blocks_per_grid_y, + unsigned const threads_per_block) { + assert(threads_x >= 1); + assert(blocks_per_grid_y >= 1); + assert(threads_per_block >= 1); + auto const threads_y = threads_per_block * blocks_per_grid_y; + auto const blocks_per_grid_x = (threads_x + threads_y - 1) / threads_y; + return make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); +} + +#define KERNELCALL_shared(_function, _grid, _block, _stream, ...) \ + _function<<<_grid, _block, _stream, stream[0]>>>(__VA_ARGS__); \ + cuda_check_errors_exit(_grid, _block, #_function, __FILE__, __LINE__); + +#define KERNELCALL_stream(_function, _grid, _block, _stream, ...) \ + _function<<<_grid, _block, 0, _stream>>>(__VA_ARGS__); \ + cuda_check_errors_exit(_grid, _block, #_function, __FILE__, __LINE__); + +#define KERNELCALL(_function, _grid, _block, ...) \ + KERNELCALL_shared(_function, _grid, _block, 0, ##__VA_ARGS__) + +#endif diff --git a/src/core/cuda_utils.hpp b/src/core/cuda_utils.hpp index 30cde8f791e..faf49dbb472 100644 --- a/src/core/cuda_utils.hpp +++ b/src/core/cuda_utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 The ESPResSo project + * Copyright (C) 2021 The ESPResSo project * * This file is part of ESPResSo. * @@ -19,35 +19,17 @@ #ifndef _CUDA_UTILS_HPP #define _CUDA_UTILS_HPP -#if !defined(__CUDACC__) -#error Do not include CUDA headers in normal C++-code!!! -#endif - -/** cuda streams for parallel computing on cpu and gpu */ -extern cudaStream_t stream[1]; - -/** Error output for memory allocation and memory copy - * @param err cuda error code - * @param file .cu file were the error took place - * @param line line of the file were the error took place - */ -void _cuda_safe_mem(cudaError_t err, const char *file, unsigned int line); - -void _cuda_check_errors(const dim3 &block, const dim3 &grid, - const char *function, const char *file, - unsigned int line); - -#define cuda_safe_mem(a) _cuda_safe_mem((a), __FILE__, __LINE__) +#include "config.hpp" -#define KERNELCALL_shared(_function, _grid, _block, _stream, ...) \ - _function<<<_grid, _block, _stream, stream[0]>>>(__VA_ARGS__); \ - _cuda_check_errors(_grid, _block, #_function, __FILE__, __LINE__); +#ifdef CUDA -#define KERNELCALL_stream(_function, _grid, _block, _stream, ...) \ - _function<<<_grid, _block, 0, _stream>>>(__VA_ARGS__); \ - _cuda_check_errors(_grid, _block, #_function, __FILE__, __LINE__); +#include +#include -#define KERNELCALL(_function, _grid, _block, ...) \ - KERNELCALL_shared(_function, _grid, _block, 0, ##__VA_ARGS__) +class cuda_runtime_error : public std::runtime_error { +public: + cuda_runtime_error(std::string const &msg) : std::runtime_error(msg) {} +}; +#endif // CUDA #endif diff --git a/src/core/dpd.cpp b/src/core/dpd.cpp index c79743126fe..657f00c965e 100644 --- a/src/core/dpd.cpp +++ b/src/core/dpd.cpp @@ -92,17 +92,6 @@ void dpd_init() { } } -void dpd_update_params(double pref_scale) { - for (int type_a = 0; type_a < max_seen_particle_type; type_a++) { - for (int type_b = 0; type_b < max_seen_particle_type; type_b++) { - IA_parameters &ia_params = *get_ia_param(type_a, type_b); - - ia_params.dpd_radial.pref *= pref_scale; - ia_params.dpd_trans.pref *= pref_scale; - } - } -} - static double weight(int type, double r_cut, double k, double r) { if (type == 0) { return 1.; diff --git a/src/core/dpd.hpp b/src/core/dpd.hpp index da7c4191dc2..0d8d8caa311 100644 --- a/src/core/dpd.hpp +++ b/src/core/dpd.hpp @@ -38,7 +38,6 @@ struct IA_parameters; int dpd_set_params(int part_type_a, int part_type_b, double gamma, double k, double r_c, int wf, double tgamma, double tr_c, int twf); void dpd_init(); -void dpd_update_params(double pref2_scale); Utils::Vector3d dpd_pair_force(Particle const &p1, Particle const &p2, IA_parameters const &ia_params, diff --git a/src/core/electrostatics_magnetostatics/coulomb.cpp b/src/core/electrostatics_magnetostatics/coulomb.cpp index 8ffbe210253..ce0990eb659 100644 --- a/src/core/electrostatics_magnetostatics/coulomb.cpp +++ b/src/core/electrostatics_magnetostatics/coulomb.cpp @@ -158,8 +158,8 @@ void deactivate() { } void update_dependent_particles() { - iccp3m_iteration(cell_structure.local_particles(), - cell_structure.ghost_particles()); + icc_iteration(cell_structure.local_particles(), + cell_structure.ghost_particles()); } void on_observable_calc() { @@ -183,8 +183,13 @@ void on_coulomb_change() { #ifdef P3M #ifdef CUDA case COULOMB_P3M_GPU: - if (this_node == 0) - p3m_gpu_init(p3m.params.cao, p3m.params.mesh, p3m.params.alpha); + if (this_node == 0) { + try { + p3m_gpu_init(p3m.params.cao, p3m.params.mesh, p3m.params.alpha); + } catch (std::runtime_error const &err) { + runtimeErrorMsg() << err.what(); + } + } break; #endif case COULOMB_ELC_P3M: @@ -359,23 +364,23 @@ double calc_energy_long_range(const ParticleRange &particles) { return energy; } -int iccp3m_sanity_check() { +int icc_sanity_check() { switch (coulomb.method) { #ifdef P3M case COULOMB_ELC_P3M: { if (elc_params.dielectric_contrast_on) { - runtimeErrorMsg() << "ICCP3M conflicts with ELC dielectric contrast"; + runtimeErrorMsg() << "ICC conflicts with ELC dielectric contrast"; return 1; } break; } #endif case COULOMB_DH: { - runtimeErrorMsg() << "ICCP3M does not work with Debye-Hueckel."; + runtimeErrorMsg() << "ICC does not work with Debye-Hueckel."; return 1; } case COULOMB_RF: { - runtimeErrorMsg() << "ICCP3M does not work with COULOMB_RF."; + runtimeErrorMsg() << "ICC does not work with COULOMB_RF."; return 1; } default: @@ -384,7 +389,7 @@ int iccp3m_sanity_check() { #ifdef NPT if (integ_switch == INTEG_METHOD_NPT_ISO) { - runtimeErrorMsg() << "ICCP3M does not work in the NPT ensemble"; + runtimeErrorMsg() << "ICC does not work in the NPT ensemble"; return 1; } #endif diff --git a/src/core/electrostatics_magnetostatics/coulomb.hpp b/src/core/electrostatics_magnetostatics/coulomb.hpp index c6cc7015fa9..b0d4f78244c 100644 --- a/src/core/electrostatics_magnetostatics/coulomb.hpp +++ b/src/core/electrostatics_magnetostatics/coulomb.hpp @@ -75,7 +75,7 @@ void calc_long_range_force(const ParticleRange &particles); double calc_energy_long_range(const ParticleRange &particles); -int iccp3m_sanity_check(); +int icc_sanity_check(); int elc_sanity_check(); diff --git a/src/core/electrostatics_magnetostatics/elc.cpp b/src/core/electrostatics_magnetostatics/elc.cpp index ac44fbe4437..41ded584289 100644 --- a/src/core/electrostatics_magnetostatics/elc.cpp +++ b/src/core/electrostatics_magnetostatics/elc.cpp @@ -45,6 +45,7 @@ #include +#include #include #include #include @@ -92,6 +93,9 @@ ELC_struct elc_params = {1e100, 10, 1, 0, true, true, false, 1, #define PQECCM 7 /**@}*/ +/** ELC axes (x and y directions)*/ +enum class PoQ : int { P, Q }; + /** temporary buffers for product decomposition */ static std::vector partblk; /** collected data from the other cells */ @@ -112,27 +116,7 @@ static std::vector scycache; * LOCAL FUNCTIONS ****************************************/ -static void distribute(int size); -/** \name q=0 per frequency code */ -/**@{*/ -static void setup_P(int p, double omega, const ParticleRange &particles); -static void add_P_force(const ParticleRange &particles); -static double P_energy(double omega, int n_part); -/**@}*/ -/** \name p=0 per frequency code */ -/**@{*/ -static void setup_Q(int q, double omega, const ParticleRange &particles); -static void add_Q_force(const ParticleRange &particles); -static double Q_energy(double omega, int n_part); -/**@}*/ -/** \name p,q <> 0 per frequency code */ -/**@{*/ -static void setup_PQ(int p, int q, double omega, - const ParticleRange &particles); -static void add_PQ_force(int p, int q, double omega, - const ParticleRange &particles); -static double PQ_energy(double omega, int n_part); -/**@}*/ +static void distribute(std::size_t size); static void add_dipole_force(const ParticleRange &particles); static double dipole_energy(const ParticleRange &particles); static double z_energy(const ParticleRange &particles); @@ -147,7 +131,7 @@ void ELC_setup_constants() { } /** - * @brief Calculated cached sin/cos values for one direction. + * @brief Calculate cached sin/cos values for one direction. * * @tparam dir Index of the dimension to consider (e.g. 0 for x ...). * @@ -157,13 +141,13 @@ void ELC_setup_constants() { * @return Calculated values. */ template -static std::vector sc_cache(const ParticleRange &particles, int n_freq, - double u) { +static std::vector calc_sc_cache(const ParticleRange &particles, + std::size_t n_freq, double u) { constexpr double c_2pi = 2 * Utils::pi(); auto const n_part = particles.size(); std::vector ret(n_freq * n_part); - for (size_t freq = 1; freq <= n_freq; freq++) { + for (std::size_t freq = 1; freq <= n_freq; freq++) { auto const pref = c_2pi * u * static_cast(freq); size_t o = (freq - 1) * n_part; @@ -176,51 +160,54 @@ static std::vector sc_cache(const ParticleRange &particles, int n_freq, return ret; } -static void prepare_sc_cache(const ParticleRange &particles, int n_freq_x, - double u_x, int n_freq_y, double u_y) { - scxcache = sc_cache<0>(particles, n_freq_x, u_x); - scycache = sc_cache<1>(particles, n_freq_y, u_y); +static void prepare_sc_cache(const ParticleRange &particles, + std::size_t n_freq_x, double u_x, + std::size_t n_freq_y, double u_y) { + scxcache = calc_sc_cache<0>(particles, n_freq_x, u_x); + scycache = calc_sc_cache<1>(particles, n_freq_y, u_y); } /*****************************************************************/ /* data distribution */ /*****************************************************************/ -inline void clear_vec(double *pdc, int size) { - for (int i = 0; i < size; i++) +inline void clear_vec(double *pdc, std::size_t size) { + for (std::size_t i = 0; i < size; i++) pdc[i] = 0; } -inline void copy_vec(double *pdc_d, double const *pdc_s, int size) { - for (int i = 0; i < size; i++) +inline void copy_vec(double *pdc_d, double const *pdc_s, std::size_t size) { + for (std::size_t i = 0; i < size; i++) pdc_d[i] = pdc_s[i]; } inline void add_vec(double *pdc_d, double const *pdc_s1, double const *pdc_s2, - int size) { - for (int i = 0; i < size; i++) + std::size_t size) { + for (std::size_t i = 0; i < size; i++) pdc_d[i] = pdc_s1[i] + pdc_s2[i]; } inline void addscale_vec(double *pdc_d, double scale, double const *pdc_s1, - double const *pdc_s2, int size) { - for (int i = 0; i < size; i++) + double const *pdc_s2, std::size_t size) { + for (std::size_t i = 0; i < size; i++) pdc_d[i] = scale * pdc_s1[i] + pdc_s2[i]; } -inline void scale_vec(double scale, double *pdc, int size) { - for (int i = 0; i < size; i++) +inline void scale_vec(double scale, double *pdc, std::size_t size) { + for (std::size_t i = 0; i < size; i++) pdc[i] *= scale; } -inline double *block(double *p, int index, int size) { +inline double *block(double *p, std::size_t index, std::size_t size) { return &p[index * size]; } -void distribute(int size) { +void distribute(std::size_t size) { + assert(size <= 8); double send_buf[8]; copy_vec(send_buf, gblcblk, size); - MPI_Allreduce(send_buf, gblcblk, size, MPI_DOUBLE, MPI_SUM, comm_cart); + MPI_Allreduce(send_buf, gblcblk, static_cast(size), MPI_DOUBLE, MPI_SUM, + comm_cart); } /** Checks if a charged particle is in the forbidden gap region @@ -246,7 +233,7 @@ inline void check_gap_elc(const Particle &p) { */ static void add_dipole_force(const ParticleRange &particles) { double const pref = coulomb.prefactor * 4 * Utils::pi() * ux * uy * uz; - int const size = 3; + constexpr std::size_t size = 3; auto local_particles = particles; @@ -311,7 +298,7 @@ static void add_dipole_force(const ParticleRange &particles) { */ static double dipole_energy(const ParticleRange &particles) { double const pref = coulomb.prefactor * 2 * Utils::pi() * ux * uy * uz; - int const size = 7; + constexpr std::size_t size = 7; /* for nonneutral systems, this shift gives the background contribution (rsp. for this shift, the DM of the background is zero) */ double const shift = 0.5 * box_geo.length()[2]; @@ -354,32 +341,32 @@ static double dipole_energy(const ParticleRange &particles) { distribute(size); // Yeh + Berkowitz term @cite yeh99a - double eng = 2 * pref * (Utils::sqr(gblcblk[2]) + gblcblk[2] * gblcblk[3]); + double energy = 2 * pref * (Utils::sqr(gblcblk[2]) + gblcblk[2] * gblcblk[3]); if (!elc_params.neutralize) { // SUBTRACT the energy of the P3M homogeneous neutralizing background - eng += 2 * pref * - (-gblcblk[0] * gblcblk[4] - - (.25 - .5 / 3.) * Utils::sqr(gblcblk[0] * box_geo.length()[2])); + energy += 2 * pref * + (-gblcblk[0] * gblcblk[4] - + (.25 - .5 / 3.) * Utils::sqr(gblcblk[0] * box_geo.length()[2])); } if (elc_params.dielectric_contrast_on) { if (elc_params.const_pot) { // zero potential difference contribution - eng += pref * height_inverse / uz * Utils::sqr(gblcblk[6]); + energy += pref * height_inverse / uz * Utils::sqr(gblcblk[6]); // external potential shift contribution - eng -= 2 * elc_params.pot_diff * height_inverse * gblcblk[6]; + energy -= 2 * elc_params.pot_diff * height_inverse * gblcblk[6]; } /* counter the P3M homogeneous background contribution to the boundaries. We never need that, since a homogeneous background spanning the artificial boundary layers is aphysical. */ - eng += pref * (-(gblcblk[1] * gblcblk[4] + gblcblk[0] * gblcblk[5]) - - (1. - 2. / 3.) * gblcblk[0] * gblcblk[1] * - Utils::sqr(box_geo.length()[2])); + energy += pref * (-(gblcblk[1] * gblcblk[4] + gblcblk[0] * gblcblk[5]) - + (1. - 2. / 3.) * gblcblk[0] * gblcblk[1] * + Utils::sqr(box_geo.length()[2])); } - return this_node == 0 ? eng : 0; + return this_node == 0 ? energy : 0; } /*****************************************************************/ @@ -405,7 +392,7 @@ inline double image_sum_t(double q, double z) { /*****************************************************************/ static double z_energy(const ParticleRange &particles) { double const pref = coulomb.prefactor * 2 * Utils::pi() * ux * uy; - int const size = 4; + constexpr std::size_t size = 4; /* for nonneutral systems, this shift gives the background contribution (rsp. for this shift, the DM of the background is zero) */ @@ -473,20 +460,20 @@ static double z_energy(const ParticleRange &particles) { } distribute(size); - double eng = 0; + double energy = 0; if (this_node == 0) - eng -= pref * (gblcblk[1] * gblcblk[2] - gblcblk[0] * gblcblk[3]); + energy -= gblcblk[1] * gblcblk[2] - gblcblk[0] * gblcblk[3]; - return eng; + return pref * energy; } /*****************************************************************/ static void add_z_force(const ParticleRange &particles) { double const pref = coulomb.prefactor * 2 * Utils::pi() * ux * uy; + constexpr std::size_t size = 1; if (elc_params.dielectric_contrast_on) { auto local_particles = particles; - int const size = 1; if (elc_params.const_pot) { clear_vec(gblcblk, size); /* just counter the 2 pi |z| contribution stemming from P3M */ @@ -536,10 +523,15 @@ static void add_z_force(const ParticleRange &particles) { /* PoQ exp sum */ /*****************************************************************/ -static void setup_P(int p, double omega, const ParticleRange &particles) { +/** \name q=0 or p=0 per frequency code */ +/**@{*/ +template +void setup_PoQ(std::size_t index, double omega, + const ParticleRange &particles) { + assert(index >= 1); double const pref_di = coulomb.prefactor * 4 * Utils::pi() * ux * uy; double const pref = -pref_di / expm1(omega * box_geo.length()[2]); - int const size = 4; + constexpr std::size_t size = 4; double lclimgebot[4], lclimgetop[4], lclimge[4]; double fac_delta_mid_bot = 1, fac_delta_mid_top = 1, fac_delta = 1; @@ -554,16 +546,17 @@ static void setup_P(int p, double omega, const ParticleRange &particles) { clear_vec(lclimge, size); clear_vec(gblcblk, size); + auto &sc_cache = (axis == PoQ::P) ? scxcache : scycache; - int ic = 0; - auto const o = static_cast((p - 1) * particles.size()); + std::size_t ic = 0; + auto const o = (index - 1) * particles.size(); for (auto &p : particles) { double e = exp(omega * p.r.p[2]); - partblk[size * ic + POQESM] = p.p.q * scxcache[o + ic].s / e; - partblk[size * ic + POQESP] = p.p.q * scxcache[o + ic].s * e; - partblk[size * ic + POQECM] = p.p.q * scxcache[o + ic].c / e; - partblk[size * ic + POQECP] = p.p.q * scxcache[o + ic].c * e; + partblk[size * ic + POQESM] = p.p.q * sc_cache[o + ic].s / e; + partblk[size * ic + POQESP] = p.p.q * sc_cache[o + ic].s * e; + partblk[size * ic + POQECM] = p.p.q * sc_cache[o + ic].c / e; + partblk[size * ic + POQECP] = p.p.q * sc_cache[o + ic].c * e; add_vec(gblcblk, gblcblk, block(partblk.data(), ic, size), size); @@ -575,10 +568,10 @@ static void setup_P(int p, double omega, const ParticleRange &particles) { double const scale = p.p.q * elc_params.delta_mid_bot; - lclimgebot[POQESM] = scxcache[o + ic].s / e; - lclimgebot[POQESP] = scxcache[o + ic].s * e; - lclimgebot[POQECM] = scxcache[o + ic].c / e; - lclimgebot[POQECP] = scxcache[o + ic].c * e; + lclimgebot[POQESM] = sc_cache[o + ic].s / e; + lclimgebot[POQESP] = sc_cache[o + ic].s * e; + lclimgebot[POQECM] = sc_cache[o + ic].c / e; + lclimgebot[POQECP] = sc_cache[o + ic].c * e; addscale_vec(gblcblk, scale, lclimgebot, gblcblk, size); @@ -595,8 +588,8 @@ static void setup_P(int p, double omega, const ParticleRange &particles) { fac_delta_mid_bot; } - lclimge[POQESP] += p.p.q * scxcache[o + ic].s * e; - lclimge[POQECP] += p.p.q * scxcache[o + ic].c * e; + lclimge[POQESP] += p.p.q * sc_cache[o + ic].s * e; + lclimge[POQECP] += p.p.q * sc_cache[o + ic].c * e; if (p.r.p[2] > (elc_params.h - elc_params.space_layer)) { // handle the upper case now @@ -605,14 +598,14 @@ static void setup_P(int p, double omega, const ParticleRange &particles) { double const scale = p.p.q * elc_params.delta_mid_top; - lclimgetop[POQESM] = scxcache[o + ic].s / e; - lclimgetop[POQESP] = scxcache[o + ic].s * e; - lclimgetop[POQECM] = scxcache[o + ic].c / e; - lclimgetop[POQECP] = scxcache[o + ic].c * e; + lclimgetop[POQESM] = sc_cache[o + ic].s / e; + lclimgetop[POQESP] = sc_cache[o + ic].s * e; + lclimgetop[POQECM] = sc_cache[o + ic].c / e; + lclimgetop[POQECP] = sc_cache[o + ic].c * e; addscale_vec(gblcblk, scale, lclimgetop, gblcblk, size); - e = (exp(omega * (p.r.p[2] - 4 * elc_params.h)) * + e = (exp(omega * (+p.r.p[2] - 4 * elc_params.h)) * elc_params.delta_mid_top + exp(omega * (-p.r.p[2] - 2 * elc_params.h))) * fac_delta; @@ -625,113 +618,8 @@ static void setup_P(int p, double omega, const ParticleRange &particles) { fac_delta_mid_top; } - lclimge[POQESM] += p.p.q * scxcache[o + ic].s * e; - lclimge[POQECM] += p.p.q * scxcache[o + ic].c * e; - } - - ic++; - } - - scale_vec(pref, gblcblk, size); - - if (elc_params.dielectric_contrast_on) { - scale_vec(pref_di, lclimge, size); - add_vec(gblcblk, gblcblk, lclimge, size); - } -} - -static void setup_Q(int q, double omega, const ParticleRange &particles) { - double const pref_di = coulomb.prefactor * 4 * Utils::pi() * ux * uy; - double const pref = -pref_di / expm1(omega * box_geo.length()[2]); - int const size = 4; - double lclimgebot[4], lclimgetop[4], lclimge[4]; - double fac_delta_mid_bot = 1, fac_delta_mid_top = 1, fac_delta = 1; - - if (elc_params.dielectric_contrast_on) { - double const fac_elc = - 1.0 / (1 - elc_params.delta_mid_top * elc_params.delta_mid_bot * - exp(-omega * 2 * elc_params.h)); - fac_delta_mid_bot = elc_params.delta_mid_bot * fac_elc; - fac_delta_mid_top = elc_params.delta_mid_top * fac_elc; - fac_delta = fac_delta_mid_bot * elc_params.delta_mid_top; - } - - clear_vec(lclimge, size); - clear_vec(gblcblk, size); - - int ic = 0; - auto const o = static_cast((q - 1) * particles.size()); - for (auto &p : particles) { - double e = exp(omega * p.r.p[2]); - - partblk[size * ic + POQESM] = p.p.q * scycache[o + ic].s / e; - partblk[size * ic + POQESP] = p.p.q * scycache[o + ic].s * e; - partblk[size * ic + POQECM] = p.p.q * scycache[o + ic].c / e; - partblk[size * ic + POQECP] = p.p.q * scycache[o + ic].c * e; - - add_vec(gblcblk, gblcblk, block(partblk.data(), ic, size), size); - - if (elc_params.dielectric_contrast_on) { - if (p.r.p[2] < elc_params.space_layer) { // handle the lower case first - // negative sign before omega is okay here as the image is located - // at -p.r.p[2] - - e = exp(-omega * p.r.p[2]); - - double const scale = p.p.q * elc_params.delta_mid_bot; - - lclimgebot[POQESM] = scycache[o + ic].s / e; - lclimgebot[POQESP] = scycache[o + ic].s * e; - lclimgebot[POQECM] = scycache[o + ic].c / e; - lclimgebot[POQECP] = scycache[o + ic].c * e; - - addscale_vec(gblcblk, scale, lclimgebot, gblcblk, size); - - e = (exp(omega * (-p.r.p[2] - 2 * elc_params.h)) * - elc_params.delta_mid_bot + - exp(omega * (p.r.p[2] - 2 * elc_params.h))) * - fac_delta; - - } else { - - e = (exp(omega * (-p.r.p[2])) + - exp(omega * (p.r.p[2] - 2 * elc_params.h)) * - elc_params.delta_mid_top) * - fac_delta_mid_bot; - } - - lclimge[POQESP] += p.p.q * scycache[o + ic].s * e; - lclimge[POQECP] += p.p.q * scycache[o + ic].c * e; - - if (p.r.p[2] > (elc_params.h - - elc_params.space_layer)) { // handle the upper case now - - e = exp(omega * (2 * elc_params.h - p.r.p[2])); - - double const scale = p.p.q * elc_params.delta_mid_top; - - lclimgetop[POQESM] = scycache[o + ic].s / e; - lclimgetop[POQESP] = scycache[o + ic].s * e; - lclimgetop[POQECM] = scycache[o + ic].c / e; - lclimgetop[POQECP] = scycache[o + ic].c * e; - - addscale_vec(gblcblk, scale, lclimgetop, gblcblk, size); - - e = (exp(omega * (p.r.p[2] - 4 * elc_params.h)) * - elc_params.delta_mid_top + - exp(omega * (-p.r.p[2] - 2 * elc_params.h))) * - fac_delta; - - } else { - - e = (exp(omega * (p.r.p[2] - 2 * elc_params.h)) + - exp(omega * (-p.r.p[2] - 2 * elc_params.h)) * - elc_params.delta_mid_bot) * - fac_delta_mid_top; - } - - lclimge[POQESM] += p.p.q * scycache[o + ic].s * e; - lclimge[POQECM] += p.p.q * scycache[o + ic].c * e; + lclimge[POQESM] += p.p.q * sc_cache[o + ic].s * e; + lclimge[POQECM] += p.p.q * sc_cache[o + ic].c * e; } ic++; @@ -745,12 +633,13 @@ static void setup_Q(int q, double omega, const ParticleRange &particles) { } } -static void add_P_force(const ParticleRange &particles) { - int const size = 4; +template void add_PoQ_force(const ParticleRange &particles) { + constexpr auto i = static_cast(axis); + constexpr std::size_t size = 4; - int ic = 0; + std::size_t ic = 0; for (auto &p : particles) { - p.f.f[0] += partblk[size * ic + POQESM] * gblcblk[POQECP] - + p.f.f[i] += partblk[size * ic + POQESM] * gblcblk[POQECP] - partblk[size * ic + POQECM] * gblcblk[POQESP] + partblk[size * ic + POQESP] * gblcblk[POQECM] - partblk[size * ic + POQECP] * gblcblk[POQESM]; @@ -762,61 +651,34 @@ static void add_P_force(const ParticleRange &particles) { } } -static double P_energy(double omega, int n_part) { - int const size = 4; - double eng = 0; - double const pref = 1 / omega; +static double PoQ_energy(double omega, std::size_t n_part) { + constexpr std::size_t size = 4; - for (int ic = 0; ic < n_part; ic++) { - eng += pref * (partblk[size * ic + POQECM] * gblcblk[POQECP] + - partblk[size * ic + POQESM] * gblcblk[POQESP] + - partblk[size * ic + POQECP] * gblcblk[POQECM] + - partblk[size * ic + POQESP] * gblcblk[POQESM]); + double energy = 0; + for (std::size_t ic = 0; ic < n_part; ic++) { + energy += partblk[size * ic + POQECM] * gblcblk[POQECP] + + partblk[size * ic + POQESM] * gblcblk[POQESP] + + partblk[size * ic + POQECP] * gblcblk[POQECM] + + partblk[size * ic + POQESP] * gblcblk[POQESM]; } - return eng; -} - -static void add_Q_force(const ParticleRange &particles) { - int const size = 4; - - int ic = 0; - for (auto &p : particles) { - p.f.f[1] += partblk[size * ic + POQESM] * gblcblk[POQECP] - - partblk[size * ic + POQECM] * gblcblk[POQESP] + - partblk[size * ic + POQESP] * gblcblk[POQECM] - - partblk[size * ic + POQECP] * gblcblk[POQESM]; - p.f.f[2] += partblk[size * ic + POQECM] * gblcblk[POQECP] + - partblk[size * ic + POQESM] * gblcblk[POQESP] - - partblk[size * ic + POQECP] * gblcblk[POQECM] - - partblk[size * ic + POQESP] * gblcblk[POQESM]; - ic++; - } -} - -static double Q_energy(double omega, int n_part) { - int const size = 4; - double eng = 0; - double const pref = 1 / omega; - - for (int ic = 0; ic < n_part; ic++) { - eng += pref * (partblk[size * ic + POQECM] * gblcblk[POQECP] + - partblk[size * ic + POQESM] * gblcblk[POQESP] + - partblk[size * ic + POQECP] * gblcblk[POQECM] + - partblk[size * ic + POQESP] * gblcblk[POQESM]); - } - return eng; + return energy / omega; } +/**@}*/ /*****************************************************************/ /* PQ particle blocks */ /*****************************************************************/ -static void setup_PQ(int p, int q, double omega, +/** \name p,q <> 0 per frequency code */ +/**@{*/ +static void setup_PQ(std::size_t index_p, std::size_t index_q, double omega, const ParticleRange &particles) { + assert(index_p >= 1); + assert(index_q >= 1); double const pref_di = coulomb.prefactor * 8 * Utils::pi() * ux * uy; double const pref = -pref_di / expm1(omega * box_geo.length()[2]); - int const size = 8; + constexpr std::size_t size = 8; double lclimgebot[8], lclimgetop[8], lclimge[8]; double fac_delta_mid_bot = 1, fac_delta_mid_top = 1, fac_delta = 1; if (elc_params.dielectric_contrast_on) { @@ -831,9 +693,9 @@ static void setup_PQ(int p, int q, double omega, clear_vec(lclimge, size); clear_vec(gblcblk, size); - int ic = 0; - auto const ox = static_cast((p - 1) * particles.size()); - auto const oy = static_cast((q - 1) * particles.size()); + std::size_t ic = 0; + auto const ox = (index_p - 1) * particles.size(); + auto const oy = (index_q - 1) * particles.size(); for (auto const &p : particles) { double e = exp(omega * p.r.p[2]); @@ -941,14 +803,14 @@ static void setup_PQ(int p, int q, double omega, } } -static void add_PQ_force(int p, int q, double omega, +static void add_PQ_force(std::size_t index_p, std::size_t index_q, double omega, const ParticleRange &particles) { constexpr double c_2pi = 2 * Utils::pi(); - double const pref_x = c_2pi * ux * p / omega; - double const pref_y = c_2pi * uy * q / omega; - int const size = 8; + double const pref_x = c_2pi * ux * static_cast(index_p) / omega; + double const pref_y = c_2pi * uy * static_cast(index_q) / omega; + constexpr std::size_t size = 8; - int ic = 0; + std::size_t ic = 0; for (auto &p : particles) { p.f.f[0] += pref_x * (partblk[size * ic + PQESCM] * gblcblk[PQECCP] + partblk[size * ic + PQESSM] * gblcblk[PQECSP] - @@ -978,23 +840,23 @@ static void add_PQ_force(int p, int q, double omega, } } -static double PQ_energy(double omega, int n_part) { - int const size = 8; - double eng = 0; - double const pref = 1 / omega; - - for (int ic = 0; ic < n_part; ic++) { - eng += pref * (partblk[size * ic + PQECCM] * gblcblk[PQECCP] + - partblk[size * ic + PQECSM] * gblcblk[PQECSP] + - partblk[size * ic + PQESCM] * gblcblk[PQESCP] + - partblk[size * ic + PQESSM] * gblcblk[PQESSP] + - partblk[size * ic + PQECCP] * gblcblk[PQECCM] + - partblk[size * ic + PQECSP] * gblcblk[PQECSM] + - partblk[size * ic + PQESCP] * gblcblk[PQESCM] + - partblk[size * ic + PQESSP] * gblcblk[PQESSM]); +static double PQ_energy(double omega, std::size_t n_part) { + constexpr std::size_t size = 8; + + double energy = 0; + for (std::size_t ic = 0; ic < n_part; ic++) { + energy += partblk[size * ic + PQECCM] * gblcblk[PQECCP] + + partblk[size * ic + PQECSM] * gblcblk[PQECSP] + + partblk[size * ic + PQESCM] * gblcblk[PQESCP] + + partblk[size * ic + PQESSM] * gblcblk[PQESSP] + + partblk[size * ic + PQECCP] * gblcblk[PQECCM] + + partblk[size * ic + PQECSP] * gblcblk[PQECSM] + + partblk[size * ic + PQESCP] * gblcblk[PQESCM] + + partblk[size * ic + PQESSP] * gblcblk[PQESSM]; } - return eng; + return energy / omega; } +/**@}*/ /*****************************************************************/ /* main loops */ @@ -1002,8 +864,8 @@ static double PQ_energy(double omega, int n_part) { void ELC_add_force(const ParticleRange &particles) { constexpr double c_2pi = 2 * Utils::pi(); - auto const n_scxcache = int(ceil(elc_params.far_cut / ux) + 1); - auto const n_scycache = int(ceil(elc_params.far_cut / uy) + 1); + auto const n_scxcache = std::size_t(ceil(elc_params.far_cut / ux) + 1); + auto const n_scycache = std::size_t(ceil(elc_params.far_cut / uy) + 1); prepare_sc_cache(particles, n_scxcache, ux, n_scycache, uy); partblk.resize(particles.size() * 8); @@ -1012,26 +874,35 @@ void ELC_add_force(const ParticleRange &particles) { add_z_force(particles); /* the second condition is just for the case of numerical accident */ - for (int p = 1; ux * (p - 1) < elc_params.far_cut && p <= n_scxcache; p++) { - auto const omega = c_2pi * ux * p; - setup_P(p, omega, particles); + for (std::size_t p = 1; + ux * static_cast(p - 1) < elc_params.far_cut && p <= n_scxcache; + p++) { + auto const omega = c_2pi * ux * static_cast(p); + setup_PoQ(p, omega, particles); distribute(4); - add_P_force(particles); + add_PoQ_force(particles); } - for (int q = 1; uy * (q - 1) < elc_params.far_cut && q <= n_scycache; q++) { - auto const omega = c_2pi * uy * q; - setup_Q(q, omega, particles); + for (std::size_t q = 1; + uy * static_cast(q - 1) < elc_params.far_cut && q <= n_scycache; + q++) { + auto const omega = c_2pi * uy * static_cast(q); + setup_PoQ(q, omega, particles); distribute(4); - add_Q_force(particles); + add_PoQ_force(particles); } - for (int p = 1; ux * (p - 1) < elc_params.far_cut && p <= n_scxcache; p++) { - for (int q = 1; Utils::sqr(ux * (p - 1)) + Utils::sqr(uy * (q - 1)) < - elc_params.far_cut2 && - q <= n_scycache; + for (std::size_t p = 1; + ux * static_cast(p - 1) < elc_params.far_cut && p <= n_scxcache; + p++) { + for (std::size_t q = 1; + Utils::sqr(ux * static_cast(p - 1)) + + Utils::sqr(uy * static_cast(q - 1)) < + elc_params.far_cut2 && + q <= n_scycache; q++) { - auto const omega = c_2pi * sqrt(Utils::sqr(ux * p) + Utils::sqr(uy * q)); + auto const omega = c_2pi * sqrt(Utils::sqr(ux * static_cast(p)) + + Utils::sqr(uy * static_cast(q))); setup_PQ(p, q, omega, particles); distribute(8); add_PQ_force(p, q, omega, particles); @@ -1041,42 +912,53 @@ void ELC_add_force(const ParticleRange &particles) { double ELC_energy(const ParticleRange &particles) { constexpr double c_2pi = 2 * Utils::pi(); - auto eng = dipole_energy(particles); - eng += z_energy(particles); + auto energy = dipole_energy(particles); + energy += z_energy(particles); - auto const n_scxcache = int(ceil(elc_params.far_cut / ux) + 1); - auto const n_scycache = int(ceil(elc_params.far_cut / uy) + 1); + auto const n_scxcache = std::size_t(ceil(elc_params.far_cut / ux) + 1); + auto const n_scycache = std::size_t(ceil(elc_params.far_cut / uy) + 1); prepare_sc_cache(particles, n_scxcache, ux, n_scycache, uy); auto const n_localpart = particles.size(); partblk.resize(n_localpart * 8); /* the second condition is just for the case of numerical accident */ - for (int p = 1; ux * (p - 1) < elc_params.far_cut && p <= n_scxcache; p++) { - auto const omega = c_2pi * ux * p; - setup_P(p, omega, particles); + for (std::size_t p = 1; + ux * static_cast(p - 1) < elc_params.far_cut && p <= n_scxcache; + p++) { + auto const omega = c_2pi * ux * static_cast(p); + setup_PoQ(p, omega, particles); distribute(4); - eng += P_energy(omega, n_localpart); + energy += PoQ_energy(omega, n_localpart); } - for (int q = 1; uy * (q - 1) < elc_params.far_cut && q <= n_scycache; q++) { - auto const omega = c_2pi * uy * q; - setup_Q(q, omega, particles); + + for (std::size_t q = 1; + uy * static_cast(q - 1) < elc_params.far_cut && q <= n_scycache; + q++) { + auto const omega = c_2pi * uy * static_cast(q); + setup_PoQ(q, omega, particles); distribute(4); - eng += Q_energy(omega, n_localpart); + energy += PoQ_energy(omega, n_localpart); } - for (int p = 1; ux * (p - 1) < elc_params.far_cut && p <= n_scxcache; p++) { - for (int q = 1; Utils::sqr(ux * (p - 1)) + Utils::sqr(uy * (q - 1)) < - elc_params.far_cut2 && - q <= n_scycache; + + for (std::size_t p = 1; + ux * static_cast(p - 1) < elc_params.far_cut && p <= n_scxcache; + p++) { + for (std::size_t q = 1; + Utils::sqr(ux * static_cast(p - 1)) + + Utils::sqr(uy * static_cast(q - 1)) < + elc_params.far_cut2 && + q <= n_scycache; q++) { - auto const omega = c_2pi * sqrt(Utils::sqr(ux * p) + Utils::sqr(uy * q)); + auto const omega = c_2pi * sqrt(Utils::sqr(ux * static_cast(p)) + + Utils::sqr(uy * static_cast(q))); setup_PQ(p, q, omega, particles); distribute(8); - eng += PQ_energy(omega, n_localpart); + energy += PQ_energy(omega, n_localpart); } } /* we count both i<->j and j<->i, so return just half of it */ - return 0.5 * eng; + return 0.5 * energy; } int ELC_tune(double error) { @@ -1089,8 +971,10 @@ int ELC_tune(double error) { lz = elc_params.h + elc_params.space_layer; } - if (h < 0) + if (h < 0) { + runtimeErrorMsg() << "gap size too large"; return ES_ERROR; + } elc_params.far_cut = min_inv_boxl; @@ -1109,8 +993,10 @@ int ELC_tune(double error) { elc_params.far_cut += min_inv_boxl; } while (err > error && elc_params.far_cut < MAXIMAL_FAR_CUT); - if (elc_params.far_cut >= MAXIMAL_FAR_CUT) + if (elc_params.far_cut >= MAXIMAL_FAR_CUT) { + runtimeErrorMsg() << "maxPWerror too small"; return ES_ERROR; + } elc_params.far_cut -= min_inv_boxl; elc_params.far_cut2 = Utils::sqr(elc_params.far_cut); @@ -1192,18 +1078,9 @@ void ELC_init() { if (elc_params.far_calculated && (elc_params.dielectric_contrast_on)) { if (ELC_tune(elc_params.maxPWerror) == ES_ERROR) { - runtimeErrorMsg() << "ELC auto-retuning failed, gap size too small"; + runtimeErrorMsg() << "ELC auto-retuning failed"; } } - if (elc_params.dielectric_contrast_on) { - p3m.params.additional_mesh[0] = 0; - p3m.params.additional_mesh[1] = 0; - p3m.params.additional_mesh[2] = elc_params.space_layer; - } else { - p3m.params.additional_mesh[0] = 0; - p3m.params.additional_mesh[1] = 0; - p3m.params.additional_mesh[2] = 0; - } } int ELC_set_params(double maxPWerror, double gap_size, double far_cut, @@ -1248,7 +1125,7 @@ int ELC_set_params(double maxPWerror, double gap_size, double far_cut, ELC_setup_constants(); - Coulomb::elc_sanity_check(); + int error_code = Coulomb::elc_sanity_check(); p3m.params.epsilon = P3M_EPSILON_METALLIC; coulomb.method = COULOMB_ELC_P3M; @@ -1260,12 +1137,12 @@ int ELC_set_params(double maxPWerror, double gap_size, double far_cut, } else { elc_params.far_calculated = true; if (ELC_tune(elc_params.maxPWerror) == ES_ERROR) { - runtimeErrorMsg() << "ELC tuning failed, gap size too small"; + error_code = ES_ERROR; } } mpi_bcast_coulomb_params(); - return ES_OK; + return error_code; } //////////////////////////////////////////////////////////////////////////////////// diff --git a/src/core/electrostatics_magnetostatics/icc.cpp b/src/core/electrostatics_magnetostatics/icc.cpp index 149f5ffc84f..64324a2d0e4 100644 --- a/src/core/electrostatics_magnetostatics/icc.cpp +++ b/src/core/electrostatics_magnetostatics/icc.cpp @@ -50,10 +50,10 @@ #include #include -iccp3m_struct iccp3m_cfg; +icc_struct icc_cfg; -void init_forces_iccp3m(const ParticleRange &particles, - const ParticleRange &ghosts_particles); +void init_forces_icc(const ParticleRange &particles, + const ParticleRange &ghosts_particles); /** Calculate the electrostatic forces between source charges (= real charges) * and wall charges. For each electrostatic method, the proper functions @@ -61,15 +61,15 @@ void init_forces_iccp3m(const ParticleRange &particles, * directly, short-range parts need helper functions according to the particle * data organisation. This is a modified version of \ref force_calc. */ -void force_calc_iccp3m(const ParticleRange &particles, - const ParticleRange &ghost_particles); +void force_calc_icc(const ParticleRange &particles, + const ParticleRange &ghost_particles); /** Variant of @ref add_non_bonded_pair_force where only %Coulomb * contributions are calculated */ -inline void add_non_bonded_pair_force_iccp3m(Particle &p1, Particle &p2, - Utils::Vector3d const &d, - double dist, double dist2) { +inline void add_non_bonded_pair_force_icc(Particle &p1, Particle &p2, + Utils::Vector3d const &d, double dist, + double dist2) { auto forces = Coulomb::pair_force(p1, p2, d, dist); p1.f.f += std::get<0>(forces); @@ -79,90 +79,74 @@ inline void add_non_bonded_pair_force_iccp3m(Particle &p1, Particle &p2, p2.f.f += std::get<2>(forces); #endif } +void icc_iteration(const ParticleRange &particles, + const ParticleRange &ghost_particles) { + if (icc_cfg.n_icc == 0) + return; -void iccp3m_alloc_lists() { - auto const n_ic = iccp3m_cfg.n_ic; - - iccp3m_cfg.areas.resize(n_ic); - iccp3m_cfg.ein.resize(n_ic); - iccp3m_cfg.normals.resize(n_ic); - iccp3m_cfg.sigma.resize(n_ic); -} - -int iccp3m_iteration(const ParticleRange &particles, - const ParticleRange &ghost_particles) { - if (iccp3m_cfg.n_ic == 0) - return 0; - - Coulomb::iccp3m_sanity_check(); - - if (iccp3m_cfg.eout <= 0) { - runtimeErrorMsg() - << "ICCP3M: nonpositive dielectric constant is not allowed."; - } + Coulomb::icc_sanity_check(); auto const pref = 1.0 / (coulomb.prefactor * 2 * Utils::pi()); - iccp3m_cfg.citeration = 0; + icc_cfg.citeration = 0; - double globalmax = 1e100; + double globalmax = 0.; - for (int j = 0; j < iccp3m_cfg.num_iteration; j++) { - double hmax = 0.; + for (int j = 0; j < icc_cfg.num_iteration; j++) { + double charge_density_max = 0.; - force_calc_iccp3m(particles, ghost_particles); /* Calculate electrostatic + force_calc_icc(particles, ghost_particles); /* Calculate electrostatic forces (SR+LR) excluding source source interaction*/ cell_structure.ghosts_reduce_forces(); double diff = 0; for (auto &p : particles) { - if (p.p.identity < iccp3m_cfg.n_ic + iccp3m_cfg.first_id && - p.p.identity >= iccp3m_cfg.first_id) { - auto const id = p.p.identity - iccp3m_cfg.first_id; + if (p.p.identity < icc_cfg.n_icc + icc_cfg.first_id && + p.p.identity >= icc_cfg.first_id) { + auto const id = p.p.identity - icc_cfg.first_id; /* the dielectric-related prefactor: */ - auto const del_eps = (iccp3m_cfg.ein[id] - iccp3m_cfg.eout) / - (iccp3m_cfg.ein[id] + iccp3m_cfg.eout); + auto const del_eps = + (icc_cfg.ein[id] - icc_cfg.eout) / (icc_cfg.ein[id] + icc_cfg.eout); /* calculate the electric field at the certain position */ - auto const E = p.f.f / p.p.q + iccp3m_cfg.ext_field; + auto const local_e_field = p.f.f / p.p.q + icc_cfg.ext_field; - if (E[0] == 0 && E[1] == 0 && E[2] == 0) { + if (local_e_field.norm2() == 0) { runtimeErrorMsg() - << "ICCP3M found zero electric field on a charge. This must " + << "ICC found zero electric field on a charge. This must " "never happen"; } - /* recalculate the old charge density */ - auto const hold = p.p.q / iccp3m_cfg.areas[id]; - /* determine if it is higher than the previously highest charge - * density */ - hmax = std::max(hmax, std::abs(hold)); - - auto const f1 = del_eps * pref * (E * iccp3m_cfg.normals[id]); - auto const f2 = (not iccp3m_cfg.sigma.empty()) - ? (2 * iccp3m_cfg.eout) / - (iccp3m_cfg.eout + iccp3m_cfg.ein[id]) * - (iccp3m_cfg.sigma[id]) - : 0.; + auto const charge_density_old = p.p.q / icc_cfg.areas[id]; + + charge_density_max = + std::max(charge_density_max, std::abs(charge_density_old)); + + auto const charge_density_update = + del_eps * pref * (local_e_field * icc_cfg.normals[id]) + + 2 * icc_cfg.eout / (icc_cfg.eout + icc_cfg.ein[id]) * + icc_cfg.sigma[id]; /* relative variation: never use an estimator which can be negative * here */ - auto const hnew = - (1. - iccp3m_cfg.relax) * hold + (iccp3m_cfg.relax) * (f1 + f2); + auto const charge_density_new = + (1. - icc_cfg.relax) * charge_density_old + + (icc_cfg.relax) * charge_density_update; /* Take the largest error to check for convergence */ auto const relative_difference = - std::abs(1 * (hnew - hold) / (hmax + std::abs(hnew + hold))); + std::abs((charge_density_new - charge_density_old) / + (charge_density_max + + std::abs(charge_density_new + charge_density_old))); diff = std::max(diff, relative_difference); - p.p.q = hnew * iccp3m_cfg.areas[id]; + p.p.q = charge_density_new * icc_cfg.areas[id]; /* check if the charge now is more than 1e6, to determine if ICC still - * leads to reasonable results */ - /* this is kind of an arbitrary measure but does a good job spotting - * divergence! */ + * leads to reasonable results. This is kind of an arbitrary measure + * but does a good job spotting divergence! */ if (std::abs(p.p.q) > 1e6) { runtimeErrorMsg() - << "too big charge assignment in iccp3m! q >1e6 , assigned " + << "too big charge assignment in icc! q >1e6 , assigned " "charge= " << p.p.q; @@ -174,62 +158,118 @@ int iccp3m_iteration(const ParticleRange &particles, /* Update charges on ghosts. */ cell_structure.ghosts_update(Cells::DATA_PART_PROPERTIES); - iccp3m_cfg.citeration++; + icc_cfg.citeration++; - MPI_Allreduce(&diff, &globalmax, 1, MPI_DOUBLE, MPI_MAX, comm_cart); + boost::mpi::all_reduce(comm_cart, diff, globalmax, + boost::mpi::maximum()); - if (globalmax < iccp3m_cfg.convergence) + if (globalmax < icc_cfg.convergence) break; } /* iteration */ - if (globalmax > iccp3m_cfg.convergence) { + if (globalmax > icc_cfg.convergence) { runtimeErrorMsg() << "ICC failed to converge in the given number of maximal steps."; } on_particle_charge_change(); - - return iccp3m_cfg.citeration; } -void force_calc_iccp3m(const ParticleRange &particles, - const ParticleRange &ghost_particles) { - init_forces_iccp3m(particles, ghost_particles); +void force_calc_icc(const ParticleRange &particles, + const ParticleRange &ghost_particles) { + init_forces_icc(particles, ghost_particles); - cell_structure.non_bonded_loop([](Particle &p1, Particle &p2, - Distance const &d) { - /* calc non-bonded interactions */ - add_non_bonded_pair_force_iccp3m(p1, p2, d.vec21, sqrt(d.dist2), d.dist2); - }); + cell_structure.non_bonded_loop( + [](Particle &p1, Particle &p2, Distance const &d) { + /* calc non-bonded interactions */ + add_non_bonded_pair_force_icc(p1, p2, d.vec21, sqrt(d.dist2), d.dist2); + }); Coulomb::calc_long_range_force(particles); } -void init_forces_iccp3m(const ParticleRange &particles, - const ParticleRange &ghosts_particles) { +void init_forces_icc(const ParticleRange &particles, + const ParticleRange &ghosts_particles) { for (auto &p : particles) { - p.f = ParticleForce{}; + p.f.f = {}; } for (auto &p : ghosts_particles) { - p.f = ParticleForce{}; + p.f.f = {}; } } -void mpi_iccp3m_init_local(const iccp3m_struct &iccp3m_cfg_) { - iccp3m_cfg = iccp3m_cfg_; +void mpi_icc_init_local(const icc_struct &icc_cfg_) { + icc_cfg = icc_cfg_; on_particle_charge_change(); check_runtime_errors(comm_cart); } -REGISTER_CALLBACK(mpi_iccp3m_init_local) +REGISTER_CALLBACK(mpi_icc_init_local) -int mpi_iccp3m_init() { - mpi_call(mpi_iccp3m_init_local, iccp3m_cfg); +int mpi_icc_init() { + mpi_call(mpi_icc_init_local, icc_cfg); on_particle_charge_change(); return check_runtime_errors(comm_cart); } +void icc_set_params(int n_icc, double convergence, double relaxation, + Utils::Vector3d &ext_field, int max_iterations, + int first_id, double eps_out, std::vector &areas, + std::vector &e_in, std::vector &sigma, + std::vector &normals) { + if (n_icc < 0) + throw std::runtime_error("ICC: invalid number of particles. " + + std::to_string(n_icc)); + if (convergence <= 0) + throw std::runtime_error("ICC: invalid convergence value. " + + std::to_string(convergence)); + if (relaxation < 0 or relaxation > 2) + throw std::runtime_error("ICC: invalid relaxation value. " + + std::to_string(relaxation)); + if (max_iterations <= 0) + throw std::runtime_error("ICC: invalid max_iterations. " + + std::to_string(max_iterations)); + if (first_id < 0) + throw std::runtime_error("ICC: invalid first_id. " + + std::to_string(first_id)); + if (eps_out <= 0) + throw std::runtime_error("ICC: invalid eps_out. " + + std::to_string(eps_out)); + if (areas.size() != n_icc) + throw std::runtime_error("ICC: invalid areas vector."); + if (e_in.size() != n_icc) + throw std::runtime_error("ICC: invalid e_in vector."); + if (sigma.size() != n_icc) + throw std::runtime_error("ICC: invalid sigma vector."); + if (normals.size() != n_icc) + throw std::runtime_error("ICC: invalid normals vector."); + + icc_cfg.n_icc = n_icc; + icc_cfg.convergence = convergence; + icc_cfg.relax = relaxation; + icc_cfg.ext_field = ext_field; + icc_cfg.num_iteration = max_iterations; + icc_cfg.first_id = first_id; + icc_cfg.eout = eps_out; + + icc_cfg.areas = std::move(areas); + icc_cfg.ein = std::move(e_in); + icc_cfg.sigma = std::move(sigma); + icc_cfg.normals = std::move(normals); + + mpi_icc_init(); +} + +void icc_deactivate() { + icc_cfg.n_icc = 0; + icc_cfg.areas.resize(0); + icc_cfg.ein.resize(0); + icc_cfg.normals.resize(0); + icc_cfg.sigma.resize(0); + + mpi_icc_init(); +} #endif diff --git a/src/core/electrostatics_magnetostatics/icc.hpp b/src/core/electrostatics_magnetostatics/icc.hpp index 12f31942e6c..317fd66ee60 100644 --- a/src/core/electrostatics_magnetostatics/icc.hpp +++ b/src/core/electrostatics_magnetostatics/icc.hpp @@ -20,22 +20,18 @@ */ /** \file * - * ICCP3M is a method that allows to take into account the influence + * ICC is a method that allows to take into account the influence * of arbitrarily shaped dielectric interfaces. The dielectric * properties of a dielectric medium in the bulk of the simulation * box are taken into account by reproducing the jump in the electric * field at the interface with charge surface segments. The charge * density of the surface segments have to be determined - * self-consistently using an iterative scheme. It can at present - - * despite its name - be used with P3M, ELCP3M and MMM1D. For - * details see: @cite tyagi10a + * self-consistently using an iterative scheme. It can at present + * be used with P3M, ELCP3M and MMM1D. For details see: @cite tyagi10a * - * To set up ICCP3M, first the dielectric boundary has to be modeled - * by ESPResSo particles 0..n where n has to be passed as a parameter - * to ICCP3M. This is still a bit inconvenient, as it forces the user - * to reserve the first n particle ids to wall charges, but as the - * other parts of ESPResSo do not suffer from a limitation like this, - * it can be tolerated. + * To set up ICC, first the dielectric boundary has to be modeled + * by ESPResSo particles n_0...n_0+n where n_0 and n have to be passed + * as a parameter to ICC. * * For the determination of the induced charges only the forces * acting on the induced charges has to be determined. As P3M and the @@ -45,8 +41,8 @@ * particle data organisation schemes this is performed differently. */ -#ifndef CORE_ICCP3M_HPP -#define CORE_ICCP3M_HPP +#ifndef CORE_ICC_HPP +#define CORE_ICC_HPP #include "config.hpp" @@ -58,25 +54,36 @@ #include #include -/** ICCP3M data structure */ -struct iccp3m_struct { - int n_ic; /**< Last induced id (cannot be smaller than 2) */ - int num_iteration = 30; /**< Number of max iterations */ - double eout = 1; /**< Dielectric constant of the bulk */ - std::vector areas; /**< Array of area of the grid elements */ - std::vector ein; /**< Array of dielectric constants at each surface - element */ - std::vector sigma; /**< Surface charge density */ - double convergence = 1e-2; /**< Convergence criterion */ - std::vector normals; /**< Surface normal vectors */ - Utils::Vector3d ext_field = {0, 0, 0}; /**< External field */ - double relax = 0.7; /**< relaxation parameter for iteration */ - int citeration = 0; /**< current number of iterations */ - int first_id = 0; /**< id of the first particle in the dielectric boundary */ +/** ICC data structure */ +struct icc_struct { + /** First id of ICC particle */ + int n_icc; + /** maximum number of iterations */ + int num_iteration = 30; + /** bulk dielectric constant */ + double eout; + /** areas of the particles */ + std::vector areas; + /** dielectric constants of the particles */ + std::vector ein; + /** surface charge density of the particles */ + std::vector sigma; + /** convergence criteria */ + double convergence = 1e-2; + /** surface normal vectors */ + std::vector normals; + /** external electric field */ + Utils::Vector3d ext_field = {0, 0, 0}; + /** relaxation parameter */ + double relax; + /** last number of iterations */ + int citeration = 0; + /** first ICC particle id */ + int first_id = 0; template void serialize(Archive &ar, long int /* version */) { - ar &n_ic; + ar &n_icc; ar &num_iteration; ar &first_id; ar &convergence; @@ -90,27 +97,32 @@ struct iccp3m_struct { ar &citeration; } }; -extern iccp3m_struct iccp3m_cfg; /**< Global state of the ICCP3M solver */ + +/** ICC parameters */ +extern icc_struct icc_cfg; /** The main iterative scheme, where the surface element charges are calculated * self-consistently. */ -int iccp3m_iteration(const ParticleRange &particles, - const ParticleRange &ghost_particles); +void icc_iteration(const ParticleRange &particles, + const ParticleRange &ghost_particles); -/** The allocation of ICCP3M lists for python interface +/** Perform ICC initialization. + * @return non-zero value on error */ -void iccp3m_alloc_lists(); +int mpi_icc_init(); -/** check sanity of parameters for use with ICCP3M +/** Set ICC parameters */ -int iccp3m_sanity_check(); +void icc_set_params(int n_ic, double convergence, double relaxation, + Utils::Vector3d &ext_field, int max_iterations, + int first_id, double eps_out, std::vector &areas, + std::vector &e_in, std::vector &sigma, + std::vector &normals); -/** Perform iccp3m initialization. - * @return non-zero value on error +/** clear ICC vector allocations */ -int mpi_iccp3m_init(); +void icc_deactivate(); #endif /* ELECTROSTATICS */ - -#endif /* ICCP3M_H */ +#endif /* CORE_ICC_HPP */ diff --git a/src/core/electrostatics_magnetostatics/magnetic_non_p3m_methods.cpp b/src/core/electrostatics_magnetostatics/magnetic_non_p3m_methods.cpp index 957022c3c69..b59c1e79e21 100644 --- a/src/core/electrostatics_magnetostatics/magnetic_non_p3m_methods.cpp +++ b/src/core/electrostatics_magnetostatics/magnetic_non_p3m_methods.cpp @@ -34,9 +34,17 @@ #include "grid.hpp" #include - -double calc_dipole_dipole_ia(Particle &p1, Utils::Vector3d const &dip1, - Particle &p2, bool force_flag) { +#include + +/** + * Calculate dipolar energy and optionally force between two particles. + * @param[in,out] p1 First particle + * @param[in] dip1 Cached dipole moment of the first particle + * @param[in,out] p2 Second particle + * @param[in] force_flag If true, update the particle forces and torques + */ +static double calc_dipole_dipole_ia(Particle &p1, Utils::Vector3d const &dip1, + Particle &p2, bool force_flag) { // Cache dipole moment auto const dip2 = p2.calc_dip(); @@ -58,7 +66,7 @@ double calc_dipole_dipole_ia(Particle &p1, Utils::Vector3d const &dip1, auto const pe4 = 3.0 / r5; // Energy - auto const u = dipole.prefactor * (pe1 / r3 - pe4 * pe2 * pe3); + auto const energy = dipole.prefactor * (pe1 / r3 - pe4 * pe2 * pe3); // Forces, if requested if (force_flag) { @@ -81,8 +89,7 @@ double calc_dipole_dipole_ia(Particle &p1, Utils::Vector3d const &dip1, p2.f.torque += dipole.prefactor * (aa / r3 + b2 * dd); } - // Return energy - return u; + return energy; } /* ============================================================================= @@ -93,23 +100,12 @@ double calc_dipole_dipole_ia(Particle &p1, Utils::Vector3d const &dip1, double dawaanr_calculations(bool force_flag, bool energy_flag, const ParticleRange &particles) { - if (n_nodes != 1) { - fprintf(stderr, "error: DAWAANR is just for one cpu...\n"); - errexit(); - } - if (!(force_flag) && !(energy_flag)) { - fprintf(stderr, "I don't know why you call dawaanr_calculations() " - "with all flags zero.\n"); - return 0; - } - - // Variable to sum up the energy - double u = 0; - - auto parts = particles; + assert(n_nodes == 1); + assert(force_flag || energy_flag); - // Iterate over all cells - for (auto it = parts.begin(), end = parts.end(); it != end; ++it) { + double energy = 0.0; + // Iterate over all particles + for (auto it = particles.begin(), end = particles.end(); it != end; ++it) { // If the particle has no dipole moment, ignore it if (it->p.dipm == 0.0) continue; @@ -123,12 +119,11 @@ double dawaanr_calculations(bool force_flag, bool energy_flag, if (jt->p.dipm == 0.0) continue; // Calculate energy and/or force between the particles - u += calc_dipole_dipole_ia(*it, dip1, *jt, force_flag); + energy += calc_dipole_dipole_ia(*it, dip1, *jt, force_flag); } } - // Return energy - return u; + return energy; } /* ============================================================================= @@ -148,6 +143,9 @@ double magnetic_dipolar_direct_sum_calculations(bool force_flag, bool energy_flag, ParticleRange const &particles) { + assert(n_nodes == 1); + assert(force_flag || energy_flag); + if (box_geo.periodic(0) and box_geo.periodic(1) and box_geo.periodic(2) and Ncut_off_magnetic_dipolar_direct_sum == 0) { throw std::runtime_error("Dipolar direct sum with replica does not support " @@ -158,17 +156,6 @@ magnetic_dipolar_direct_sum_calculations(bool force_flag, bool energy_flag, std::vector mx, my, mz; std::vector fx, fy, fz; std::vector tx, ty, tz; - double u; - - if (n_nodes != 1) { - fprintf(stderr, "error: magnetic Direct Sum is just for one cpu...\n"); - errexit(); - } - if (!(force_flag) && !(energy_flag)) { - fprintf(stderr, "I don't know why you call magnetic_dipolar_direct_sum_" - "calculations() with all flags zero\n"); - return 0; - } auto const n_part = particles.size(); @@ -217,111 +204,98 @@ magnetic_dipolar_direct_sum_calculations(bool force_flag, bool energy_flag, } } - /*now we do the calculations */ - - { /* beginning of the area of calculation */ - int NCUT[3], NCUT2; - - for (int i = 0; i < 3; i++) { - NCUT[i] = Ncut_off_magnetic_dipolar_direct_sum; - if (box_geo.periodic(i) == 0) { - NCUT[i] = 0; - } - } - NCUT2 = Ncut_off_magnetic_dipolar_direct_sum * - Ncut_off_magnetic_dipolar_direct_sum; - - u = 0; - - for (int i = 0; i < dip_particles; i++) { - for (int j = 0; j < dip_particles; j++) { - auto const pe1 = mx[i] * mx[j] + my[i] * my[j] + mz[i] * mz[j]; - auto const rx = x[i] - x[j]; - auto const ry = y[i] - y[j]; - auto const rz = z[i] - z[j]; - - for (int nx = -NCUT[0]; nx <= NCUT[0]; nx++) { - auto const rnx = rx + nx * box_geo.length()[0]; - auto const rnx2 = rnx * rnx; - for (int ny = -NCUT[1]; ny <= NCUT[1]; ny++) { - auto const rny = ry + ny * box_geo.length()[1]; - auto const rny2 = rny * rny; - for (int nz = -NCUT[2]; nz <= NCUT[2]; nz++) { - if (!(i == j && nx == 0 && ny == 0 && nz == 0)) { - if (nx * nx + ny * ny + nz * nz <= NCUT2) { - auto const rnz = rz + nz * box_geo.length()[2]; - auto const r2 = rnx2 + rny2 + rnz * rnz; - auto const r = sqrt(r2); - auto const r3 = r2 * r; - auto const r5 = r3 * r2; - auto const r7 = r5 * r2; - - auto const pe2 = mx[i] * rnx + my[i] * rny + mz[i] * rnz; - auto const pe3 = mx[j] * rnx + my[j] * rny + mz[j] * rnz; - - // Energy ............................ - - u += pe1 / r3 - 3.0 * pe2 * pe3 / r5; - - if (force_flag) { - double a, b, c, d; - // force ............................ - a = mx[i] * mx[j] + my[i] * my[j] + mz[i] * mz[j]; - a = 3.0 * a / r5; - b = -15.0 * pe2 * pe3 / r7; - c = 3.0 * pe3 / r5; - d = 3.0 * pe2 / r5; - - fx[i] += (a + b) * rnx + c * mx[i] + d * mx[j]; - fy[i] += (a + b) * rny + c * my[i] + d * my[j]; - fz[i] += (a + b) * rnz + c * mz[i] + d * mz[j]; - - // torque ............................ - c = 3.0 / r5 * pe3; - auto const ax = my[i] * mz[j] - my[j] * mz[i]; - auto const ay = mx[j] * mz[i] - mx[i] * mz[j]; - auto const az = mx[i] * my[j] - mx[j] * my[i]; - - auto const bx = my[i] * rnz - rny * mz[i]; - auto const by = rnx * mz[i] - mx[i] * rnz; - auto const bz = mx[i] * rny - rnx * my[i]; - - tx[i] += -ax / r3 + bx * c; - ty[i] += -ay / r3 + by * c; - tz[i] += -az / r3 + bz * c; - } /* of force_flag */ - } - } /* of nx*nx+ny*ny +nz*nz< NCUT*NCUT and !(i==j && nx==0 && - ny==0 && nz==0) */ - } /* of for nz */ - } /* of for ny */ - } /* of for nx */ - } - } /* of j and i */ - } /* end of the area of calculation */ + /* energy calculation */ + double energy = 0.; - /* set the forces, and torques of the particles within ESPResSo */ + int NCUT[3]; + for (int i = 0; i < 3; i++) { + NCUT[i] = box_geo.periodic(i) ? Ncut_off_magnetic_dipolar_direct_sum : 0; + } + auto const NCUT2 = Utils::sqr(Ncut_off_magnetic_dipolar_direct_sum); + + for (int i = 0; i < dip_particles; i++) { + for (int j = 0; j < dip_particles; j++) { + auto const pe1 = mx[i] * mx[j] + my[i] * my[j] + mz[i] * mz[j]; + auto const rx = x[i] - x[j]; + auto const ry = y[i] - y[j]; + auto const rz = z[i] - z[j]; + + for (int nx = -NCUT[0]; nx <= NCUT[0]; nx++) { + auto const rnx = rx + nx * box_geo.length()[0]; + auto const rnx2 = rnx * rnx; + for (int ny = -NCUT[1]; ny <= NCUT[1]; ny++) { + auto const rny = ry + ny * box_geo.length()[1]; + auto const rny2 = rny * rny; + for (int nz = -NCUT[2]; nz <= NCUT[2]; nz++) { + if (!(i == j && nx == 0 && ny == 0 && nz == 0) and + (nx * nx + ny * ny + nz * nz <= NCUT2)) { + auto const rnz = rz + nz * box_geo.length()[2]; + auto const r2 = rnx2 + rny2 + rnz * rnz; + auto const r = sqrt(r2); + auto const r3 = r2 * r; + auto const r5 = r3 * r2; + auto const r7 = r5 * r2; + + auto const pe2 = mx[i] * rnx + my[i] * rny + mz[i] * rnz; + auto const pe3 = mx[j] * rnx + my[j] * rny + mz[j] * rnz; + auto const pe4 = 3.0 / r5; + + // Energy + energy += pe1 / r3 - pe4 * pe2 * pe3; + + if (force_flag) { + // Forces + auto const a = pe4 * pe1; + auto const b = -15.0 * pe2 * pe3 / r7; + auto const c = pe4 * pe3; + auto const d = pe4 * pe2; + + fx[i] += (a + b) * rnx + c * mx[i] + d * mx[j]; + fy[i] += (a + b) * rny + c * my[i] + d * my[j]; + fz[i] += (a + b) * rnz + c * mz[i] + d * mz[j]; + + // Torques + auto const ax = my[i] * mz[j] - my[j] * mz[i]; + auto const ay = mx[j] * mz[i] - mx[i] * mz[j]; + auto const az = mx[i] * my[j] - mx[j] * my[i]; + + auto const bx = my[i] * rnz - rny * mz[i]; + auto const by = rnx * mz[i] - mx[i] * rnz; + auto const bz = mx[i] * rny - rnx * my[i]; + + tx[i] += -ax / r3 + bx * c; + ty[i] += -ay / r3 + by * c; + tz[i] += -az / r3 + bz * c; + } /* if force_flag */ + } /* if distance criterion */ + } /* for nz */ + } /* for ny */ + } /* for nx */ + } /* for j */ + } /* for i */ + + /* update particle forces and torques */ if (force_flag) { - int dip_particles2 = 0; + dip_particles = 0; for (auto &p : particles) { if (p.p.dipm != 0.0) { - p.f.f[0] += dipole.prefactor * fx[dip_particles2]; - p.f.f[1] += dipole.prefactor * fy[dip_particles2]; - p.f.f[2] += dipole.prefactor * fz[dip_particles2]; + p.f.f[0] += dipole.prefactor * fx[dip_particles]; + p.f.f[1] += dipole.prefactor * fy[dip_particles]; + p.f.f[2] += dipole.prefactor * fz[dip_particles]; - p.f.torque[0] += dipole.prefactor * tx[dip_particles2]; - p.f.torque[1] += dipole.prefactor * ty[dip_particles2]; - p.f.torque[2] += dipole.prefactor * tz[dip_particles2]; + p.f.torque[0] += dipole.prefactor * tx[dip_particles]; + p.f.torque[1] += dipole.prefactor * ty[dip_particles]; + p.f.torque[2] += dipole.prefactor * tz[dip_particles]; - dip_particles2++; + dip_particles++; } } - } /*of if force_flag */ + } /* if force_flag */ - return 0.5 * dipole.prefactor * u; + return 0.5 * dipole.prefactor * energy; } int dawaanr_set_params() { diff --git a/src/core/electrostatics_magnetostatics/magnetic_non_p3m_methods.hpp b/src/core/electrostatics_magnetostatics/magnetic_non_p3m_methods.hpp index 4eaacaf7435..f594b5c72ca 100644 --- a/src/core/electrostatics_magnetostatics/magnetic_non_p3m_methods.hpp +++ b/src/core/electrostatics_magnetostatics/magnetic_non_p3m_methods.hpp @@ -42,9 +42,6 @@ #include "Particle.hpp" #include "ParticleRange.hpp" -/** Calculate dipolar energy and/or force between two particles */ -double calc_dipole_dipole_ia(Particle &p1, Particle &p2, bool force_flag); - /* ============================================================================= DAWAANR => DIPOLAR ALL WITH ALL AND NO REPLICA ============================================================================= diff --git a/src/core/electrostatics_magnetostatics/p3m-common.cpp b/src/core/electrostatics_magnetostatics/p3m-common.cpp index 0bf10fc1a61..abbfffb0e58 100644 --- a/src/core/electrostatics_magnetostatics/p3m-common.cpp +++ b/src/core/electrostatics_magnetostatics/p3m-common.cpp @@ -115,14 +115,17 @@ double p3m_analytic_cotangent_sum(int n, double mesh_i, int cao) { void p3m_calc_local_ca_mesh(p3m_local_mesh &local_mesh, const P3MParameters ¶ms, - const LocalBox &local_geo, double skin) { + const LocalBox &local_geo, double skin, + double space_layer) { int i; int ind[3]; /* total skin size */ double full_skin[3]; for (i = 0; i < 3; i++) - full_skin[i] = params.cao_cut[i] + skin + params.additional_mesh[i]; + full_skin[i] = params.cao_cut[i] + skin; + + full_skin[2] += space_layer; /* inner left down grid point (global index) */ for (i = 0; i < 3; i++) diff --git a/src/core/electrostatics_magnetostatics/p3m-common.hpp b/src/core/electrostatics_magnetostatics/p3m-common.hpp index 7e002994c95..26e9fbaf3e5 100644 --- a/src/core/electrostatics_magnetostatics/p3m-common.hpp +++ b/src/core/electrostatics_magnetostatics/p3m-common.hpp @@ -147,14 +147,11 @@ typedef struct { /** number of points unto which a single charge is interpolated, i.e. * p3m.cao^3 */ int cao3 = 0; - /** additional points around the charge assignment mesh, for method like - * dielectric ELC creating virtual charges. */ - double additional_mesh[3] = {}; template void serialize(Archive &ar, long int) { ar &tuning &alpha_L &r_cut_iL &mesh; ar &mesh_off &cao &accuracy &epsilon &cao_cut; - ar &a &ai &alpha &r_cut &cao3 &additional_mesh; + ar &a &ai &alpha &r_cut &cao3; } } P3MParameters; @@ -186,7 +183,8 @@ double p3m_analytic_cotangent_sum(int n, double mesh_i, int cao); */ void p3m_calc_local_ca_mesh(p3m_local_mesh &local_mesh, const P3MParameters ¶ms, - const LocalBox &local_geo, double skin); + const LocalBox &local_geo, double skin, + double space_layer); /** Calculate the spatial position of the left down mesh * point of the local mesh, to be stored in diff --git a/src/core/electrostatics_magnetostatics/p3m-dipolar.cpp b/src/core/electrostatics_magnetostatics/p3m-dipolar.cpp index 02bbce6992a..d2fe0cef479 100644 --- a/src/core/electrostatics_magnetostatics/p3m-dipolar.cpp +++ b/src/core/electrostatics_magnetostatics/p3m-dipolar.cpp @@ -70,6 +70,7 @@ #include #include #include +#include #include /************************************************ @@ -199,7 +200,7 @@ void dp3m_init() { * and the cutoff for charge assignment dp3m.params.cao_cut */ dp3m_init_a_ai_cao_cut(); - p3m_calc_local_ca_mesh(dp3m.local_mesh, dp3m.params, local_geo, skin); + p3m_calc_local_ca_mesh(dp3m.local_mesh, dp3m.params, local_geo, skin, 0.0); dp3m.sm.resize(comm_cart, dp3m.local_mesh); @@ -225,8 +226,7 @@ void dp3m_init() { * functions related to the parsing & tuning of the dipolar parameters ******************/ -void dp3m_set_tune_params(double r_cut, int mesh, int cao, double alpha, - double accuracy) { +void dp3m_set_tune_params(double r_cut, int mesh, int cao, double accuracy) { if (r_cut >= 0) { dp3m.params.r_cut = r_cut; dp3m.params.r_cut_iL = r_cut / box_geo.length()[0]; @@ -238,75 +238,68 @@ void dp3m_set_tune_params(double r_cut, int mesh, int cao, double alpha, if (cao >= 0) dp3m.params.cao = cao; - if (alpha >= 0) { - dp3m.params.alpha = alpha; - dp3m.params.alpha_L = alpha * box_geo.length()[0]; - } - if (accuracy >= 0) dp3m.params.accuracy = accuracy; } /*****************************************************************************/ -int dp3m_set_params(double r_cut, int mesh, int cao, double alpha, - double accuracy) { - if (dipole.method != DIPOLAR_P3M && dipole.method != DIPOLAR_MDLC_P3M) - Dipole::set_method_local(DIPOLAR_P3M); - +void dp3m_set_params(double r_cut, int mesh, int cao, double alpha, + double accuracy) { if (r_cut < 0) - return -1; + throw std::runtime_error("DipolarP3M: invalid r_cut"); if (mesh < 0) - return -2; + throw std::runtime_error("DipolarP3M: invalid mesh size"); + + if (cao < 1 || cao > 7) + throw std::runtime_error("DipolarP3M: invalid cao"); - if (cao < 1 || cao > 7 || cao > mesh) - return -3; + if (cao > mesh) + throw std::runtime_error("DipolarP3M: cao larger than mesh size"); + + if (alpha <= 0.0 && alpha != -1.0) + throw std::runtime_error("DipolarP3M: invalid alpha"); + + if (accuracy <= 0.0 && accuracy != -1.0) + throw std::runtime_error("DipolarP3M: invalid accuracy"); + + if (dipole.method != DIPOLAR_P3M && dipole.method != DIPOLAR_MDLC_P3M) + Dipole::set_method_local(DIPOLAR_P3M); dp3m.params.r_cut = r_cut; dp3m.params.r_cut_iL = r_cut / box_geo.length()[0]; dp3m.params.mesh[2] = dp3m.params.mesh[1] = dp3m.params.mesh[0] = mesh; dp3m.params.cao = cao; - - if (alpha > 0) { - dp3m.params.alpha = alpha; - dp3m.params.alpha_L = alpha * box_geo.length()[0]; - } else if (alpha != -1.0) - return -4; - - if (accuracy >= 0) - dp3m.params.accuracy = accuracy; - else if (accuracy != -1.0) - return -5; + dp3m.params.alpha = alpha; + dp3m.params.alpha_L = alpha * box_geo.length()[0]; + dp3m.params.accuracy = accuracy; mpi_bcast_coulomb_params(); - - return 0; } -int dp3m_set_mesh_offset(double x, double y, double z) { +void dp3m_set_mesh_offset(double x, double y, double z) { + if (x == -1.0 && y == -1.0 && z == -1.0) + return; + if (x < 0.0 || x > 1.0 || y < 0.0 || y > 1.0 || z < 0.0 || z > 1.0) - return ES_ERROR; + throw std::runtime_error("DipolarP3M: invalid mesh offset"); dp3m.params.mesh_off[0] = x; dp3m.params.mesh_off[1] = y; dp3m.params.mesh_off[2] = z; mpi_bcast_coulomb_params(); - - return ES_OK; } /** We left the handling of the epsilon, due to portability reasons in * the future for the electrical dipoles, or if people want to do * electrical dipoles alone using the magnetic code. Currently unused. */ -int dp3m_set_eps(double eps) { +void dp3m_set_eps(double eps) { dp3m.params.epsilon = eps; mpi_bcast_coulomb_params(); - - return ES_OK; } namespace { diff --git a/src/core/electrostatics_magnetostatics/p3m-dipolar.hpp b/src/core/electrostatics_magnetostatics/p3m-dipolar.hpp index fc262d361ca..660842d10be 100644 --- a/src/core/electrostatics_magnetostatics/p3m-dipolar.hpp +++ b/src/core/electrostatics_magnetostatics/p3m-dipolar.hpp @@ -60,11 +60,11 @@ struct dp3m_data_struct : public p3m_data_struct_base { /** local mesh. */ p3m_local_mesh local_mesh; - /** real space mesh (local) for CA/FFT.*/ + /** real space mesh (local) for CA/FFT. */ fft_vector rs_mesh; - /** real space mesh (local) for CA/FFT of the dipolar field.*/ + /** real space mesh (local) for CA/FFT of the dipolar field. */ std::array, 3> rs_mesh_dip; - /** k-space mesh (local) for k-space calculation and FFT.*/ + /** k-space mesh (local) for k-space calculation and FFT. */ std::vector ks_mesh; /** number of dipolar particles (only on master node). */ @@ -72,7 +72,7 @@ struct dp3m_data_struct : public p3m_data_struct_base { /** Sum of square of magnetic dipoles (only on master node). */ double sum_mu2; - /** position shift for calc. of first assignment mesh point. */ + /** position shift for calculation of first assignment mesh point. */ double pos_shift; p3m_interpolation_cache inter_weights; @@ -80,7 +80,7 @@ struct dp3m_data_struct : public p3m_data_struct_base { /** send/recv mesh sizes */ p3m_send_mesh sm; - /* Stores the value of the energy correction due to MS effects */ + /** value of the energy correction due to MS effects */ double energy_correction; fft_data_struct fft; @@ -90,18 +90,17 @@ struct dp3m_data_struct : public p3m_data_struct_base { extern dp3m_data_struct dp3m; /** @copydoc p3m_set_tune_params */ -void dp3m_set_tune_params(double r_cut, int mesh, int cao, double alpha, - double accuracy); +void dp3m_set_tune_params(double r_cut, int mesh, int cao, double accuracy); /** @copydoc p3m_set_params */ -int dp3m_set_params(double r_cut, int mesh, int cao, double alpha, - double accuracy); +void dp3m_set_params(double r_cut, int mesh, int cao, double alpha, + double accuracy); /** @copydoc p3m_set_mesh_offset */ -int dp3m_set_mesh_offset(double x, double y, double z); +void dp3m_set_mesh_offset(double x, double y, double z); /** @copydoc p3m_set_eps */ -int dp3m_set_eps(double eps); +void dp3m_set_eps(double eps); /** Initialize all structures, parameters and arrays needed for the * P3M algorithm for dipole-dipole interactions. @@ -115,8 +114,6 @@ void dp3m_scaleby_box_l(); bool dp3m_sanity_checks(const Utils::Vector3i &grid); /** Assign the physical dipoles using the tabulated assignment function. - * If Dstore_ca_frac is true, then the charge fractions are buffered in - * Dcur_ca_fmp and Dcur_ca_frac. */ void dp3m_dipole_assign(const ParticleRange &particles); diff --git a/src/core/electrostatics_magnetostatics/p3m.cpp b/src/core/electrostatics_magnetostatics/p3m.cpp index 2a9b2d5bd66..58eb290e686 100644 --- a/src/core/electrostatics_magnetostatics/p3m.cpp +++ b/src/core/electrostatics_magnetostatics/p3m.cpp @@ -66,6 +66,7 @@ #include #include #include +#include using Utils::sinc; @@ -184,7 +185,13 @@ void p3m_init() { return; } - p3m_calc_local_ca_mesh(p3m.local_mesh, p3m.params, local_geo, skin); + double elc_layer = 0.0; + if (coulomb.method == COULOMB_ELC_P3M) { + elc_layer = elc_params.space_layer; + } + + p3m_calc_local_ca_mesh(p3m.local_mesh, p3m.params, local_geo, skin, + elc_layer); p3m.sm.resize(comm_cart, p3m.local_mesh); @@ -205,7 +212,7 @@ void p3m_init() { p3m_count_charged_particles(); } -void p3m_set_tune_params(double r_cut, const int mesh[3], int cao, double alpha, +void p3m_set_tune_params(double r_cut, const int mesh[3], int cao, double accuracy) { if (r_cut >= 0) { p3m.params.r_cut = r_cut; @@ -224,29 +231,33 @@ void p3m_set_tune_params(double r_cut, const int mesh[3], int cao, double alpha, if (cao >= 0) p3m.params.cao = cao; - if (alpha >= 0) { - p3m.params.alpha = alpha; - p3m.params.alpha_L = alpha * box_geo.length()[0]; - } - if (accuracy >= 0) p3m.params.accuracy = accuracy; } -int p3m_set_params(double r_cut, const int *mesh, int cao, double alpha, - double accuracy) { - if (coulomb.method != COULOMB_P3M && coulomb.method != COULOMB_ELC_P3M && - coulomb.method != COULOMB_P3M_GPU) - coulomb.method = COULOMB_P3M; - +void p3m_set_params(double r_cut, const int *mesh, int cao, double alpha, + double accuracy) { if (r_cut < 0) - return -1; + throw std::runtime_error("P3M: invalid r_cut"); + + if (mesh[0] < 0 || mesh[1] < 0 || mesh[2] < 0) + throw std::runtime_error("P3M: invalid mesh size"); + + if (cao < 1 || cao > 7) + throw std::runtime_error("P3M: invalid cao"); + + if (cao > mesh[0] || cao > mesh[1] || cao > mesh[2]) + throw std::runtime_error("P3M: cao larger than mesh size"); - if ((mesh[0] < 0) || (mesh[1] < 0) || (mesh[2] < 0)) - return -2; + if (alpha <= 0.0 && alpha != -1.0) + throw std::runtime_error("P3M: invalid alpha"); - if (cao < 1 || cao > 7 || cao > mesh[0] || cao > mesh[1] || cao > mesh[2]) - return -3; + if (accuracy <= 0.0 && accuracy != -1.0) + throw std::runtime_error("P3M: invalid accuracy"); + + if (coulomb.method != COULOMB_P3M && coulomb.method != COULOMB_ELC_P3M && + coulomb.method != COULOMB_P3M_GPU) + coulomb.method = COULOMB_P3M; p3m.params.r_cut = r_cut; p3m.params.r_cut_iL = r_cut * (1. / box_geo.length()[0]); @@ -254,42 +265,31 @@ int p3m_set_params(double r_cut, const int *mesh, int cao, double alpha, p3m.params.mesh[1] = mesh[1]; p3m.params.mesh[0] = mesh[0]; p3m.params.cao = cao; - - if (alpha > 0) { - p3m.params.alpha = alpha; - p3m.params.alpha_L = alpha * box_geo.length()[0]; - } else if (alpha != -1.0) - return -4; - - if (accuracy >= 0) - p3m.params.accuracy = accuracy; - else if (accuracy != -1.0) - return -5; + p3m.params.alpha = alpha; + p3m.params.alpha_L = alpha * box_geo.length()[0]; + p3m.params.accuracy = accuracy; mpi_bcast_coulomb_params(); - - return 0; } -int p3m_set_mesh_offset(double x, double y, double z) { +void p3m_set_mesh_offset(double x, double y, double z) { + if (x == -1.0 && y == -1.0 && z == -1.0) + return; + if (x < 0.0 || x > 1.0 || y < 0.0 || y > 1.0 || z < 0.0 || z > 1.0) - return ES_ERROR; + throw std::runtime_error("P3M: invalid mesh offset"); p3m.params.mesh_off[0] = x; p3m.params.mesh_off[1] = y; p3m.params.mesh_off[2] = z; mpi_bcast_coulomb_params(); - - return ES_OK; } -int p3m_set_eps(double eps) { +void p3m_set_eps(double eps) { p3m.params.epsilon = eps; mpi_bcast_coulomb_params(); - - return ES_OK; } namespace { diff --git a/src/core/electrostatics_magnetostatics/p3m.hpp b/src/core/electrostatics_magnetostatics/p3m.hpp index 4433af82a7b..56859930bb5 100644 --- a/src/core/electrostatics_magnetostatics/p3m.hpp +++ b/src/core/electrostatics_magnetostatics/p3m.hpp @@ -61,9 +61,9 @@ struct p3m_data_struct : public p3m_data_struct_base { /** local mesh. */ p3m_local_mesh local_mesh; - /** real space mesh (local) for CA/FFT.*/ + /** real space mesh (local) for CA/FFT. */ fft_vector rs_mesh; - /** mesh (local) for the electric field.*/ + /** mesh (local) for the electric field. */ std::array, 3> E_mesh; /** number of charged particles (only on master node). */ @@ -197,10 +197,9 @@ inline void p3m_add_pair_force(double q1q2, Utils::Vector3d const &d, * @param[in] r_cut @copybrief P3MParameters::r_cut * @param[in] mesh @copybrief P3MParameters::mesh * @param[in] cao @copybrief P3MParameters::cao - * @param[in] alpha @copybrief P3MParameters::alpha * @param[in] accuracy @copybrief P3MParameters::accuracy */ -void p3m_set_tune_params(double r_cut, const int mesh[3], int cao, double alpha, +void p3m_set_tune_params(double r_cut, const int mesh[3], int cao, double accuracy); /** Set custom parameters @@ -210,23 +209,22 @@ void p3m_set_tune_params(double r_cut, const int mesh[3], int cao, double alpha, * @param[in] cao @copybrief P3MParameters::cao * @param[in] alpha @copybrief P3MParameters::alpha * @param[in] accuracy @copybrief P3MParameters::accuracy - * @return Custom error code */ -int p3m_set_params(double r_cut, const int *mesh, int cao, double alpha, - double accuracy); +void p3m_set_params(double r_cut, const int *mesh, int cao, double alpha, + double accuracy); /** Set mesh offset * * @param[in] x , y , z Components of @ref P3MParameters::mesh_off * "mesh_off" */ -int p3m_set_mesh_offset(double x, double y, double z); +void p3m_set_mesh_offset(double x, double y, double z); /** Set @ref P3MParameters::epsilon "epsilon" parameter * * @param[in] eps @copybrief P3MParameters::epsilon */ -int p3m_set_eps(double eps); +void p3m_set_eps(double eps); /** Calculate real space contribution of Coulomb pair energy. */ inline double p3m_pair_energy(double chgfac, double dist) { diff --git a/src/core/electrostatics_magnetostatics/p3m_gpu_cuda.cu b/src/core/electrostatics_magnetostatics/p3m_gpu_cuda.cu index 63516a28146..72c2ac6a687 100644 --- a/src/core/electrostatics_magnetostatics/p3m_gpu_cuda.cu +++ b/src/core/electrostatics_magnetostatics/p3m_gpu_cuda.cu @@ -54,7 +54,7 @@ #include "BoxGeometry.hpp" #include "EspressoSystemInterface.hpp" #include "cuda_interface.hpp" -#include "cuda_utils.hpp" +#include "cuda_utils.cuh" #include "electrostatics_magnetostatics/coulomb.hpp" #include "global.hpp" @@ -68,6 +68,7 @@ #include #include +#include #if defined(OMPI_MPI_H) || defined(_MPI_H) #error CU-file includes mpi.h! This should not happen! @@ -404,7 +405,7 @@ void assign_charges(const CUDA_particle_data *const pdata, const P3MGpuData p) { default: break; } - _cuda_check_errors(block, grid, "assign_charge", __FILE__, __LINE__); + cuda_check_errors_exit(block, grid, "assign_charge", __FILE__, __LINE__); } template @@ -549,7 +550,7 @@ void assign_forces(const CUDA_particle_data *const pdata, const P3MGpuData p, default: break; } - _cuda_check_errors(block, grid, "assign_forces", __FILE__, __LINE__); + cuda_check_errors_exit(block, grid, "assign_forces", __FILE__, __LINE__); } /* Init the internal data structures of the P3M GPU. @@ -559,6 +560,9 @@ void assign_forces(const CUDA_particle_data *const pdata, const P3MGpuData p, * is (cuFFT convention) Nx x Ny x [ Nz /2 + 1 ]. */ void p3m_gpu_init(int cao, const int mesh[3], double alpha) { + if (mesh[0] == -1 && mesh[1] == -1 && mesh[2] == -1) + throw std::runtime_error("P3M: invalid mesh size"); + espressoSystemInterface.requestParticleStructGpu(); bool reinit_if = false, mesh_changed = false; diff --git a/src/core/electrostatics_magnetostatics/p3m_gpu_error_cuda.cu b/src/core/electrostatics_magnetostatics/p3m_gpu_error_cuda.cu index dae80628d23..8015e624d18 100644 --- a/src/core/electrostatics_magnetostatics/p3m_gpu_error_cuda.cu +++ b/src/core/electrostatics_magnetostatics/p3m_gpu_error_cuda.cu @@ -25,7 +25,7 @@ #include "p3m_gpu_error.hpp" -#include "cuda_utils.hpp" +#include "cuda_utils.cuh" #include #include diff --git a/src/core/event.cpp b/src/core/event.cpp index 08f5af1372d..4e8f7f6a369 100644 --- a/src/core/event.cpp +++ b/src/core/event.cpp @@ -32,6 +32,7 @@ #include "config.hpp" #include "cuda_init.hpp" #include "cuda_interface.hpp" +#include "cuda_utils.hpp" #include "electrostatics_magnetostatics/coulomb.hpp" #include "electrostatics_magnetostatics/dipole.hpp" #include "errorhandling.hpp" @@ -55,6 +56,8 @@ #include +#include + #include /** whether the thermostat has to be reinitialized before integration */ @@ -78,7 +81,11 @@ static int reinit_magnetostatics = false; void on_program_start() { #ifdef CUDA if (this_node == 0) { - cuda_init(); + try { + cuda_init(); + } catch (cuda_runtime_error const &err) { + // pass + } } #endif @@ -87,11 +94,8 @@ void on_program_start() { /* initially go for domain decomposition */ cells_re_init(CELL_STRUCTURE_DOMDEC); - /* - call all initializations to do only on the master node here. - */ if (this_node == 0) { - /* interaction_data.c: make sure 0<->0 ia always exists */ + /* make sure interaction 0<->0 always exists */ make_particle_type_exist(0); } } @@ -161,20 +165,20 @@ void on_observable_calc() { Coulomb::on_observable_calc(); reinit_electrostatics = false; } -#endif /*ifdef ELECTROSTATICS */ +#endif /* ELECTROSTATICS */ #ifdef DIPOLES if (reinit_magnetostatics) { Dipole::on_observable_calc(); reinit_magnetostatics = false; } -#endif /*ifdef ELECTROSTATICS */ +#endif /* DIPOLES */ #ifdef ELECTROKINETICS if (ek_initialized) { ek_integrate_electrostatics(); } -#endif +#endif /* ELECTROKINETICS */ clear_particle_node(); } @@ -236,10 +240,10 @@ void on_lbboundary_change() { void on_boxl_change() { grid_changed_box_l(box_geo); /* Electrostatics cutoffs mostly depend on the system size, - therefore recalculate them. */ + * therefore recalculate them. */ cells_re_init(cell_structure.decomposition_type()); -/* Now give methods a chance to react to the change in box length */ + /* Now give methods a chance to react to the change in box length */ #ifdef ELECTROSTATICS Coulomb::on_boxl_change(); #endif @@ -257,9 +261,9 @@ void on_boxl_change() { void on_cell_structure_change() { clear_particle_node(); -/* Now give methods a chance to react to the change in cell - structure. Most ES methods need to reinitialize, as they depend - on skin, node grid and so on. */ + /* Now give methods a chance to react to the change in cell + * structure. Most ES methods need to reinitialize, as they depend + * on skin, node grid and so on. */ #ifdef ELECTROSTATICS Coulomb::init(); #endif /* ifdef ELECTROSTATICS */ diff --git a/src/core/forces.cpp b/src/core/forces.cpp index d1ea3de495f..a7f9d0d9fe2 100644 --- a/src/core/forces.cpp +++ b/src/core/forces.cpp @@ -96,7 +96,7 @@ void force_calc(CellStructure &cell_structure, double time_step) { auto particles = cell_structure.local_particles(); auto ghost_particles = cell_structure.ghost_particles(); #ifdef ELECTROSTATICS - iccp3m_iteration(particles, cell_structure.ghost_particles()); + icc_iteration(particles, cell_structure.ghost_particles()); #endif init_forces(particles, time_step); diff --git a/src/core/grid_based_algorithms/electrokinetics.hpp b/src/core/grid_based_algorithms/electrokinetics.hpp index a86b29a9b79..00110e1e9af 100644 --- a/src/core/grid_based_algorithms/electrokinetics.hpp +++ b/src/core/grid_based_algorithms/electrokinetics.hpp @@ -142,7 +142,6 @@ void ek_integrate(); void ek_integrate_electrostatics(); void ek_print_parameters(); void ek_print_lbpar(); -void lb_set_ek_pointer(EK_parameters *pointeradress); unsigned int ek_calculate_boundary_mass(); int ek_print_vtk_density(int species, char *filename); int ek_print_vtk_flux(int species, char *filename); diff --git a/src/core/grid_based_algorithms/electrokinetics_cuda.cu b/src/core/grid_based_algorithms/electrokinetics_cuda.cu index f618dd4351c..d1ae96921a0 100644 --- a/src/core/grid_based_algorithms/electrokinetics_cuda.cu +++ b/src/core/grid_based_algorithms/electrokinetics_cuda.cu @@ -25,7 +25,7 @@ #include "grid_based_algorithms/electrokinetics.hpp" #include "cuda_interface.hpp" -#include "cuda_utils.hpp" +#include "cuda_utils.cuh" #include "errorhandling.hpp" #include "fd-electrostatics.cuh" #include "grid_based_algorithms/lb_boundaries.hpp" @@ -36,7 +36,6 @@ #include #include -#include #include #include @@ -50,17 +49,14 @@ #include #include #include +#include #if defined(OMPI_MPI_H) || defined(_MPI_H) #error CU-file includes mpi.h! This should not happen! #endif -/* TODO: get rid of this code duplication with lb-boundaries.h by solving the - cuda-mpi incompatibility */ - extern ActiveLB lattice_switch; extern bool ek_initialized; -EK_parameters *lb_ek_parameters_gpu; // Used to limit register use for the pressure calculation #define EK_LINK_U00_pressure 0 @@ -73,9 +69,8 @@ EK_parameters *lb_ek_parameters_gpu; #ifdef EK_BOUNDARIES void LBBoundaries::lb_init_boundaries(); #endif -/* end of code duplication */ -#define PI_FLOAT 3.14159265358979323846f +static constexpr unsigned int threads_per_block = 64; EK_parameters ek_parameters = { // agrid @@ -156,6 +151,10 @@ EK_parameters ek_parameters = { nullptr, // lb_force_density_previous nullptr, +#ifdef EK_DEBUG + // j_fluc + nullptr, +#endif // rho {nullptr}, // species_index @@ -176,7 +175,6 @@ EK_parameters ek_parameters = { __device__ __constant__ EK_parameters ek_parameters_gpu[1]; ekfloat *charge_gpu; -EK_parameters *ek_parameters_gpu_pointer; LB_parameters_gpu *ek_lbparameters_gpu; CUDA_particle_data *particle_data_gpu; float *ek_lb_boundary_force; @@ -255,26 +253,6 @@ __device__ unsigned int rhoindex_cartesian2linear_padded(unsigned int x, y * ek_parameters_gpu->dim_x_padded + x; } -__device__ void jindex_linear2cartesian(unsigned int index, unsigned int *coord, - unsigned int *c) { - - coord[0] = index % ek_parameters_gpu->dim_x; - index /= ek_parameters_gpu->dim_x; - coord[1] = index % ek_parameters_gpu->dim_y; - index /= ek_parameters_gpu->dim_y; - coord[2] = index % ek_parameters_gpu->dim_z; - *c = index / ek_parameters_gpu->dim_z; -} - -__device__ unsigned int jindex_cartesian2linear(unsigned int x, unsigned int y, - unsigned int z, - unsigned int c) { - - return c * ek_parameters_gpu->number_of_nodes + - z * ek_parameters_gpu->dim_y * ek_parameters_gpu->dim_x + - y * ek_parameters_gpu->dim_x + x; -} - // TODO fluxindex fastest running might improve caching __device__ unsigned int jindex_getByRhoLinear(unsigned int rho_index, unsigned int c) { @@ -291,8 +269,9 @@ __device__ void ek_displacement(float *dx, LB_nodes_gpu n, float mode[19]; - for (int i = 0; i < 19; i++) { - mode[i] = n.vd[i * ek_lbparameters_gpu->number_of_nodes + node_index]; + for (unsigned i = 0; i < 19; i++) { + mode[i] = + n.populations[i * ek_lbparameters_gpu->number_of_nodes + node_index]; } rho += mode[0] + mode[1] + mode[2] + mode[3] + mode[4] + mode[5] + mode[6] + @@ -1171,14 +1150,14 @@ ek_add_advection_to_flux(unsigned int index, unsigned int *neighborindex, LB_node_force_density_gpu node_f, LB_nodes_gpu lb_node, LB_parameters_gpu *ek_lbparameters_gpu) { float dx[3]; - int di[3]; + unsigned int di[3]; unsigned int node; ek_displacement(dx, lb_node, index, ek_lbparameters_gpu); - di[0] = 1 - signbit(dx[0]); - di[1] = 1 - signbit(dx[1]); - di[2] = 1 - signbit(dx[2]); + di[0] = 1 - static_cast(signbit(dx[0])); + di[1] = 1 - static_cast(signbit(dx[1])); + di[2] = 1 - static_cast(signbit(dx[2])); dx[0] = fabs(dx[0]); dx[1] = fabs(dx[1]); @@ -1194,8 +1173,7 @@ ek_add_advection_to_flux(unsigned int index, unsigned int *neighborindex, ek_parameters_gpu->dim_x, coord[1], coord[2]); - target_node[0] = (coord[0] + 2 * static_cast(di[0]) - 1 + - ek_parameters_gpu->dim_x) % + target_node[0] = (coord[0] + 2 * di[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x; target_node[1] = coord[1]; target_node[2] = coord[2]; @@ -1218,8 +1196,7 @@ ek_add_advection_to_flux(unsigned int index, unsigned int *neighborindex, coord[2]); target_node[0] = coord[0]; - target_node[1] = (coord[1] + 2 * static_cast(di[1]) - 1 + - ek_parameters_gpu->dim_y) % + target_node[1] = (coord[1] + 2 * di[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y; target_node[2] = coord[2]; target_node_index = @@ -1240,8 +1217,7 @@ ek_add_advection_to_flux(unsigned int index, unsigned int *neighborindex, target_node[0] = coord[0]; target_node[1] = coord[1]; - target_node[2] = (coord[2] + 2 * static_cast(di[2]) - 1 + - ek_parameters_gpu->dim_z) % + target_node[2] = (coord[2] + 2 * di[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z; target_node_index = rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]); @@ -1262,11 +1238,9 @@ ek_add_advection_to_flux(unsigned int index, unsigned int *neighborindex, ek_parameters_gpu->dim_z); target_node[0] = coord[0]; - target_node[1] = (coord[1] + 2 * static_cast(di[1]) - 1 + - ek_parameters_gpu->dim_y) % + target_node[1] = (coord[1] + 2 * di[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y; - target_node[2] = (coord[2] + 2 * static_cast(di[2]) - 1 + - ek_parameters_gpu->dim_z) % + target_node[2] = (coord[2] + 2 * di[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z; target_node_index = rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]); @@ -1288,12 +1262,10 @@ ek_add_advection_to_flux(unsigned int index, unsigned int *neighborindex, (coord[2] + (1 - di[0]) * (2 * di[2] - 1) + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - target_node[0] = (coord[0] + 2 * static_cast(di[0]) - 1 + - ek_parameters_gpu->dim_x) % + target_node[0] = (coord[0] + 2 * di[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x; target_node[1] = coord[1]; - target_node[2] = (coord[2] + 2 * static_cast(di[2]) - 1 + - ek_parameters_gpu->dim_z) % + target_node[2] = (coord[2] + 2 * di[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z; target_node_index = rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]); @@ -1315,11 +1287,9 @@ ek_add_advection_to_flux(unsigned int index, unsigned int *neighborindex, ek_parameters_gpu->dim_y, coord[2]); - target_node[0] = (coord[0] + 2 * static_cast(di[0]) - 1 + - ek_parameters_gpu->dim_x) % + target_node[0] = (coord[0] + 2 * di[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x; - target_node[1] = (coord[1] + 2 * static_cast(di[1]) - 1 + - ek_parameters_gpu->dim_y) % + target_node[1] = (coord[1] + 2 * di[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y; target_node[2] = coord[2]; target_node_index = @@ -1343,14 +1313,11 @@ ek_add_advection_to_flux(unsigned int index, unsigned int *neighborindex, (coord[2] + (1 - di[0]) * (2 * di[2] - 1) + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z); - target_node[0] = (coord[0] + 2 * static_cast(di[0]) - 1 + - ek_parameters_gpu->dim_x) % + target_node[0] = (coord[0] + 2 * di[0] - 1 + ek_parameters_gpu->dim_x) % ek_parameters_gpu->dim_x; - target_node[1] = (coord[1] + 2 * static_cast(di[1]) - 1 + - ek_parameters_gpu->dim_y) % + target_node[1] = (coord[1] + 2 * di[1] - 1 + ek_parameters_gpu->dim_y) % ek_parameters_gpu->dim_y; - target_node[2] = (coord[2] + 2 * static_cast(di[2]) - 1 + - ek_parameters_gpu->dim_z) % + target_node[2] = (coord[2] + 2 * di[2] - 1 + ek_parameters_gpu->dim_z) % ek_parameters_gpu->dim_z; target_node_index = rhoindex_cartesian2linear(target_node[0], target_node[1], target_node[2]); @@ -1360,9 +1327,9 @@ ek_add_advection_to_flux(unsigned int index, unsigned int *neighborindex, atomicAdd(&ek_parameters_gpu->j[jindex_getByRhoLinear( node, (1 - di[0]) * (EK_LINK_UUU + 2 * di[1] + di[2]) + di[0] * (EK_LINK_UDD - 2 * di[1] - di[2]))], - static_cast(2 * di[0] - 1) * + (2 * static_cast(di[0]) - 1) * ek_parameters_gpu->rho[species_index][index] * dx[0] * dx[1] * - dx[2] * static_cast(not_boundary)); + dx[2] * static_cast(not_boundary)); } __device__ float4 ek_random_wrapper_philox(unsigned int index, @@ -1405,7 +1372,7 @@ __device__ void ek_add_fluctuations_to_flux(unsigned int index, #endif float fluc = 0.0f; - for (int i = 0; i < 9; i++) { + for (unsigned i = 0; i < 9; i++) { if (i % 4 == 0) { random_floats = ek_random_wrapper_philox(index, i + 40, philox_counter); @@ -1804,7 +1771,7 @@ __global__ void ek_apply_boundaries(unsigned int species_index, (coord[2] + 1) % ek_parameters_gpu->dim_z); /* Clear fluxes on links connecting a boundary node */ - for (int i = 0; i < 13; i++) + for (unsigned i = 0; i < 13; i++) ek_parameters_gpu->j[jindex_getByRhoLinear(index, i)] = 0.0f; ek_parameters_gpu->j[jindex_getByRhoLinear( @@ -1841,7 +1808,7 @@ __global__ void ek_clear_fluxes() { unsigned int index = ek_getThreadIndex(); if (index < ek_parameters_gpu->number_of_nodes) { - for (int i = 0; i < 13; i++) { + for (unsigned i = 0; i < 13; i++) { ek_parameters_gpu->j[jindex_getByRhoLinear(index, i)] = 0.0f; #ifdef EK_DEBUG ek_parameters_gpu->j_fluc[jindex_getByRhoLinear(index, i)] = 0.0f; @@ -1883,32 +1850,29 @@ ek_gather_particle_charge_density(CUDA_particle_data *particle_data, size_t number_of_particles, LB_parameters_gpu *ek_lbparameters_gpu) { unsigned int index = ek_getThreadIndex(); - int lowernode[3]; + unsigned int lowernode[3]; float cellpos[3]; float gridpos; if (index < number_of_particles) { gridpos = particle_data[index].p[0] / ek_parameters_gpu->agrid - 0.5f; - lowernode[0] = (int)floorf(gridpos); + lowernode[0] = static_cast(floorf(gridpos)); cellpos[0] = gridpos - static_cast(lowernode[0]); gridpos = particle_data[index].p[1] / ek_parameters_gpu->agrid - 0.5f; - lowernode[1] = (int)floorf(gridpos); + lowernode[1] = static_cast(floorf(gridpos)); cellpos[1] = gridpos - static_cast(lowernode[1]); gridpos = particle_data[index].p[2] / ek_parameters_gpu->agrid - 0.5f; - lowernode[2] = (int)floorf(gridpos); + lowernode[2] = static_cast(floorf(gridpos)); cellpos[2] = gridpos - static_cast(lowernode[2]); - lowernode[0] = - static_cast((lowernode[0] + ek_lbparameters_gpu->dim_x) % - ek_lbparameters_gpu->dim_x); - lowernode[1] = - static_cast((lowernode[1] + ek_lbparameters_gpu->dim_y) % - ek_lbparameters_gpu->dim_y); - lowernode[2] = - static_cast((lowernode[2] + ek_lbparameters_gpu->dim_z) % - ek_lbparameters_gpu->dim_z); + lowernode[0] = (lowernode[0] + ek_lbparameters_gpu->dim_x) % + ek_lbparameters_gpu->dim_x; + lowernode[1] = (lowernode[1] + ek_lbparameters_gpu->dim_y) % + ek_lbparameters_gpu->dim_y; + lowernode[2] = (lowernode[2] + ek_lbparameters_gpu->dim_z) % + ek_lbparameters_gpu->dim_z; atomicAdd(&((cufftReal *)ek_parameters_gpu ->charge_potential)[rhoindex_cartesian2linear_padded( @@ -1973,32 +1937,29 @@ ek_spread_particle_force(CUDA_particle_data *particle_data, LB_parameters_gpu *ek_lbparameters_gpu) { unsigned int index = ek_getThreadIndex(); - int lowernode[3]; + unsigned int lowernode[3]; float cellpos[3]; float gridpos; if (index < number_of_particles) { gridpos = particle_data[index].p[0] / ek_parameters_gpu->agrid - 0.5f; - lowernode[0] = (int)floorf(gridpos); - cellpos[0] = gridpos - (float)(lowernode[0]); + lowernode[0] = static_cast(floorf(gridpos)); + cellpos[0] = gridpos - static_cast(lowernode[0]); gridpos = particle_data[index].p[1] / ek_parameters_gpu->agrid - 0.5f; - lowernode[1] = (int)floorf(gridpos); - cellpos[1] = gridpos - (float)(lowernode[1]); + lowernode[1] = static_cast(floorf(gridpos)); + cellpos[1] = gridpos - static_cast(lowernode[1]); gridpos = particle_data[index].p[2] / ek_parameters_gpu->agrid - 0.5f; - lowernode[2] = (int)floorf(gridpos); - cellpos[2] = gridpos - (float)(lowernode[2]); - - lowernode[0] = - static_cast((lowernode[0] + ek_lbparameters_gpu->dim_x) % - ek_lbparameters_gpu->dim_x); - lowernode[1] = - static_cast((lowernode[1] + ek_lbparameters_gpu->dim_y) % - ek_lbparameters_gpu->dim_y); - lowernode[2] = - static_cast((lowernode[2] + ek_lbparameters_gpu->dim_z) % - ek_lbparameters_gpu->dim_z); + lowernode[2] = static_cast(floorf(gridpos)); + cellpos[2] = gridpos - static_cast(lowernode[2]); + + lowernode[0] = (lowernode[0] + ek_lbparameters_gpu->dim_x) % + ek_lbparameters_gpu->dim_x; + lowernode[1] = (lowernode[1] + ek_lbparameters_gpu->dim_y) % + ek_lbparameters_gpu->dim_y; + lowernode[2] = (lowernode[2] + ek_lbparameters_gpu->dim_z) % + ek_lbparameters_gpu->dim_z; float efield[3] = {0., 0., 0.}; for (unsigned int dim = 0; dim < 3; ++dim) { @@ -2149,7 +2110,7 @@ __global__ void ek_calculate_system_charge(ekfloat *charge_gpu) { } // TODO delete ?? (it has the previous step setting now) -// This is not compatible with external LB force_densitys! +// This is not compatible with external LB force_densities! __global__ void ek_clear_node_force(LB_node_force_density_gpu node_f) { unsigned int index = ek_getThreadIndex(); @@ -2173,17 +2134,13 @@ __global__ void ek_clear_node_force(LB_node_force_density_gpu node_f) { } void ek_calculate_electrostatic_coupling() { - const int blocks_per_grid_y = 4; - const int threads_per_block = 64; if ((!ek_parameters.es_coupling) || (!ek_initialized)) return; auto device_particles = gpu_get_particle_pointer(); - auto blocks_per_grid_x = static_cast( - (device_particles.size() + threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = calculate_dim_grid( + static_cast(device_particles.size()), 4, threads_per_block); KERNELCALL(ek_spread_particle_force, dim_grid, threads_per_block, device_particles.data(), device_particles.size(), @@ -2192,13 +2149,8 @@ void ek_calculate_electrostatic_coupling() { void ek_integrate_electrostatics() { - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = - static_cast((ek_parameters.number_of_nodes + - threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); KERNELCALL(ek_gather_species_charge_density, dim_grid, threads_per_block); @@ -2215,13 +2167,10 @@ void ek_integrate_electrostatics() { } auto device_particles = gpu_get_particle_pointer(); - if (not device_particles - .empty()) // TODO make it an if number_of_charged_particles != 0 - { - blocks_per_grid_x = static_cast( - (device_particles.size() + threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + // TODO make it an if number_of_charged_particles != 0 + if (not device_particles.empty()) { + dim_grid = calculate_dim_grid( + static_cast(device_particles.size()), 4, threads_per_block); particle_data_gpu = device_particles.data(); @@ -2233,14 +2182,8 @@ void ek_integrate_electrostatics() { } void ek_integrate() { - /** values for the kernel call */ - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = - static_cast((ek_parameters.number_of_nodes + - threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); /* Clears the force on the nodes and must be called before fluxes are calculated, since in the reaction set up the previous-step LB force is @@ -2251,7 +2194,7 @@ void ek_integrate() { // KERNELCALL( ek_clear_node_force, dim_grid, threads_per_block, node_f ); /* Integrate diffusion-advection */ - for (int i = 0; i < ek_parameters.number_of_species; i++) { + for (unsigned i = 0; i < ek_parameters.number_of_species; i++) { KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block); KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block, i, *current_nodes, node_f, ek_lbparameters_gpu, ek_lb_device_values, @@ -2281,13 +2224,8 @@ void ek_gather_wallcharge_species_density(ekfloat *wallcharge_species_density, } void ek_init_species_density_wallcharge(ekfloat *wallcharge_species_density, int wallcharge_species) { - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = - static_cast((ek_parameters.number_of_nodes + - threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); KERNELCALL(ek_clear_boundary_densities, dim_grid, threads_per_block, *current_nodes); @@ -2339,19 +2277,7 @@ int ek_init() { return 1; } - int threads_per_block = 64; - int blocks_per_grid_y = 4; - int blocks_per_grid_x; - dim3 dim_grid; - if (!ek_initialized) { - if (cudaGetSymbolAddress((void **)&ek_parameters_gpu_pointer, - ek_parameters_gpu) != cudaSuccess) { - fprintf(stderr, "ERROR: Fetching constant memory pointer\n"); - - return 1; - } - for (auto &val : ek_parameters.species_index) { val = -1; } @@ -2374,10 +2300,10 @@ int ek_init() { lb_lbcoupling_set_gamma(ek_parameters.friction); // Convert the density (given in MD units) to LB units - lbpar_gpu.rho = (ek_parameters.lb_density < 0.0 - ? 1.0f - : ek_parameters.lb_density * - Utils::int_pow<3>(ek_parameters.agrid)); + lbpar_gpu.rho = + (ek_parameters.lb_density < 0.0) + ? 1.0f + : ek_parameters.lb_density * Utils::int_pow<3>(ek_parameters.agrid); lbpar_gpu.is_TRT = true; @@ -2436,7 +2362,6 @@ int ek_init() { sizeof(EK_parameters))); lb_get_para_pointer(&ek_lbparameters_gpu); - lb_set_ek_pointer(ek_parameters_gpu_pointer); cuda_safe_mem( cudaMalloc((void **)&ek_parameters.lb_force_density_previous, @@ -2491,11 +2416,9 @@ int ek_init() { sizeof(EK_parameters))); // clear initial LB force and finish up - blocks_per_grid_x = static_cast( - (ek_parameters.dim_z * ek_parameters.dim_y * (ek_parameters.dim_x) + - threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = calculate_dim_grid( + ek_parameters.dim_z * ek_parameters.dim_y * ek_parameters.dim_x, 4, + threads_per_block); KERNELCALL(ek_clear_node_force, dim_grid, threads_per_block, node_f); ek_initialized = true; @@ -2518,11 +2441,8 @@ int ek_init() { cuda_safe_mem(cudaMemcpyToSymbol(ek_parameters_gpu, &ek_parameters, sizeof(EK_parameters))); - blocks_per_grid_x = - static_cast((ek_parameters.number_of_nodes + - threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); KERNELCALL(ek_init_species_density_homogeneous, dim_grid, threads_per_block); @@ -2540,24 +2460,17 @@ int ek_init() { return 0; } -void lb_set_ek_pointer(EK_parameters *pointeradress) { - lb_ek_parameters_gpu = pointeradress; -} - unsigned int ek_calculate_boundary_mass() { - auto *bound_array = (unsigned int *)Utils::malloc(lbpar_gpu.number_of_nodes * - sizeof(unsigned int)); + std::vector bound_array(lbpar_gpu.number_of_nodes); - lb_get_boundary_flags_GPU(bound_array); + lb_get_boundary_flags_GPU(bound_array.data()); unsigned int boundary_node_number = 0; - for (int j = 0; j < ek_parameters.number_of_nodes; j++) + for (unsigned j = 0; j < ek_parameters.number_of_nodes; j++) if (bound_array[j] != 0) boundary_node_number++; - free(bound_array); - return boundary_node_number; } @@ -2571,10 +2484,7 @@ void rhoindex_linear2cartesian_host(unsigned int index, unsigned int *coord) { unsigned int jindex_cartesian2linear_host(unsigned int x, unsigned int y, unsigned int z, unsigned int c) { - - x = (x + ek_parameters.dim_x) % - ek_parameters - .dim_x; // this does not happen in the GPU version of this function + x = (x + ek_parameters.dim_x) % ek_parameters.dim_x; y = (y + ek_parameters.dim_y) % ek_parameters.dim_y; z = (z + ek_parameters.dim_z) % ek_parameters.dim_z; @@ -2604,9 +2514,8 @@ int ek_lb_print_vtk_velocity(char *filename) { return 1; } - auto *host_values = (LB_rho_v_pi_gpu *)Utils::malloc( - lbpar_gpu.number_of_nodes * sizeof(LB_rho_v_pi_gpu)); - lb_get_values_GPU(host_values); + std::vector host_values(lbpar_gpu.number_of_nodes); + lb_get_values_GPU(host_values.data()); auto const lattice_speed = lbpar_gpu.agrid / lbpar_gpu.tau; fprintf(fp, "\ # vtk DataFile Version 2.0\n\ @@ -2625,35 +2534,30 @@ LOOKUP_TABLE default\n", lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid, lbpar_gpu.agrid, lbpar_gpu.agrid, lbpar_gpu.number_of_nodes); - for (int i = 0; i < lbpar_gpu.number_of_nodes; i++) { - fprintf(fp, "%e %e %e ", host_values[i].v[0] * lattice_speed, + for (unsigned i = 0; i < lbpar_gpu.number_of_nodes; i++) { + fprintf(fp, "%e %e %e\n", host_values[i].v[0] * lattice_speed, host_values[i].v[1] * lattice_speed, host_values[i].v[2] * lattice_speed); } - free(host_values); fclose(fp); return 0; } -int ek_node_print_velocity( - int x, int y, int z, - double *velocity) { // TODO only calculate single node velocity +int ek_node_print_velocity(int x, int y, int z, double *velocity) { + // TODO: only calculate single node velocity + std::vector host_values(lbpar_gpu.number_of_nodes); + lb_get_values_GPU(host_values.data()); - auto *host_values = (LB_rho_v_pi_gpu *)Utils::malloc( - lbpar_gpu.number_of_nodes * sizeof(LB_rho_v_pi_gpu)); - lb_get_values_GPU(host_values); - - auto const i = z * ek_parameters.dim_y * ek_parameters.dim_x + - y * ek_parameters.dim_x + x; + auto const index = + static_cast(z) * ek_parameters.dim_y * ek_parameters.dim_x + + static_cast(y) * ek_parameters.dim_x + static_cast(x); auto const lattice_speed = lbpar_gpu.agrid / lbpar_gpu.tau; - velocity[0] = host_values[i].v[0] * lattice_speed; - velocity[1] = host_values[i].v[1] * lattice_speed; - velocity[2] = host_values[i].v[2] * lattice_speed; - - free(host_values); + velocity[0] = host_values[index].v[0] * lattice_speed; + velocity[1] = host_values[index].v[1] * lattice_speed; + velocity[2] = host_values[index].v[2] * lattice_speed; return 0; } @@ -2666,9 +2570,8 @@ int ek_lb_print_vtk_density(char *filename) { return 1; } - auto *host_values = (LB_rho_v_pi_gpu *)Utils::malloc( - lbpar_gpu.number_of_nodes * sizeof(LB_rho_v_pi_gpu)); - lb_get_values_GPU(host_values); + std::vector host_values(lbpar_gpu.number_of_nodes); + lb_get_values_GPU(host_values.data()); fprintf(fp, "\ # vtk DataFile Version 2.0\n\ @@ -2688,11 +2591,10 @@ LOOKUP_TABLE default\n", lbpar_gpu.agrid * 0.5f, lbpar_gpu.agrid, lbpar_gpu.agrid, lbpar_gpu.agrid, lbpar_gpu.number_of_nodes); auto const agrid = lb_lbfluid_get_agrid(); - for (int i = 0; i < lbpar_gpu.number_of_nodes; i++) { - fprintf(fp, "%e ", host_values[i].rho / agrid / agrid / agrid); + for (unsigned i = 0; i < lbpar_gpu.number_of_nodes; i++) { + fprintf(fp, "%e\n", host_values[i].rho / agrid / agrid / agrid); } - free(host_values); fclose(fp); return 0; @@ -2710,12 +2612,11 @@ int ek_print_vtk_density(int species, char *filename) { return 1; } - auto *densities = - (ekfloat *)Utils::malloc(ek_parameters.number_of_nodes * sizeof(ekfloat)); + std::vector densities(ek_parameters.number_of_nodes); cuda_safe_mem(cudaMemcpy( - densities, ek_parameters.rho[ek_parameters.species_index[species]], - ek_parameters.number_of_nodes * sizeof(ekfloat), cudaMemcpyDeviceToHost)); + densities.data(), ek_parameters.rho[ek_parameters.species_index[species]], + densities.size() * sizeof(ekfloat), cudaMemcpyDeviceToHost)); fprintf(fp, "\ # vtk DataFile Version 2.0\n\ @@ -2736,11 +2637,10 @@ LOOKUP_TABLE default\n", ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid, ek_parameters.number_of_nodes, species); - for (int i = 0; i < ek_parameters.number_of_nodes; i++) { + for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) { fprintf(fp, "%e\n", densities[i] / Utils::int_pow<3>(ek_parameters.agrid)); } - free(densities); fclose(fp); return 0; @@ -2752,18 +2652,16 @@ int ek_node_print_density(int species, int x, int y, int z, double *density) { return 1; } - auto *densities = - (ekfloat *)Utils::malloc(ek_parameters.number_of_nodes * sizeof(ekfloat)); + std::vector densities(ek_parameters.number_of_nodes); cuda_safe_mem(cudaMemcpy( - densities, ek_parameters.rho[ek_parameters.species_index[species]], - ek_parameters.number_of_nodes * sizeof(ekfloat), cudaMemcpyDeviceToHost)); + densities.data(), ek_parameters.rho[ek_parameters.species_index[species]], + densities.size() * sizeof(ekfloat), cudaMemcpyDeviceToHost)); - *density = densities[z * ek_parameters.dim_y * ek_parameters.dim_x + - y * ek_parameters.dim_x + x] / - Utils::int_pow<3>(ek_parameters.agrid); - - free(densities); + auto const index = + static_cast(z) * ek_parameters.dim_y * ek_parameters.dim_x + + static_cast(y) * ek_parameters.dim_x + static_cast(x); + *density = densities[index] / Utils::int_pow<3>(ek_parameters.agrid); return 0; } @@ -2778,34 +2676,30 @@ int ek_node_print_flux(int species, int x, int y, int z, double *flux) { // into Cartesian coordinates for output unsigned int coord[3]; - coord[0] = x; - coord[1] = y; - coord[2] = z; + coord[0] = static_cast(x); + coord[1] = static_cast(y); + coord[2] = static_cast(z); - auto *fluxes = (ekfloat *)Utils::malloc(ek_parameters.number_of_nodes * 13 * - sizeof(ekfloat)); + std::vector fluxes(ek_parameters.number_of_nodes * 13); - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = - static_cast((ek_parameters.number_of_nodes + - threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block); KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block, - ek_parameters.species_index[species], *current_nodes, node_f, - ek_lbparameters_gpu, ek_lb_device_values, philox_counter.value()); + static_cast(ek_parameters.species_index[species]), + *current_nodes, node_f, ek_lbparameters_gpu, ek_lb_device_values, + philox_counter.value()); reset_LB_force_densities_GPU(false); #ifdef EK_BOUNDARIES KERNELCALL(ek_apply_boundaries, dim_grid, threads_per_block, - ek_parameters.species_index[species], *current_nodes, node_f); + static_cast(ek_parameters.species_index[species]), + *current_nodes, node_f); #endif - cuda_safe_mem(cudaMemcpy(fluxes, ek_parameters.j, - ek_parameters.number_of_nodes * 13 * sizeof(ekfloat), + cuda_safe_mem(cudaMemcpy(fluxes.data(), ek_parameters.j, + fluxes.size() * sizeof(ekfloat), cudaMemcpyDeviceToHost)); auto const i = rhoindex_cartesian2linear_host(coord[0], coord[1], coord[2]); @@ -2970,24 +2864,24 @@ int ek_node_print_flux(int species, int x, int y, int z, double *flux) { flux[2] = flux_local_cartesian[2] / (ek_parameters.time_step * Utils::sqr(ek_parameters.agrid)); - free(fluxes); - return 0; } int ek_node_set_density(int species, int x, int y, int z, double density) { - if (ek_parameters.species_index[species] != -1) { - auto index = - static_cast(z * ek_parameters.dim_y * ek_parameters.dim_x + - y * ek_parameters.dim_x + x); - ekfloat num_particles = - static_cast(density) * Utils::int_pow<3>(ek_parameters.agrid); - cuda_safe_mem(cudaMemcpy( - &ek_parameters.rho[ek_parameters.species_index[species]][index], - &num_particles, sizeof(ekfloat), cudaMemcpyHostToDevice)); - } else + if (ek_parameters.species_index[species] == -1) { return 1; + } + + auto const index = + static_cast(z) * ek_parameters.dim_y * ek_parameters.dim_x + + static_cast(y) * ek_parameters.dim_x + static_cast(x); + ekfloat num_particles = + static_cast(density) * Utils::int_pow<3>(ek_parameters.agrid); + + cuda_safe_mem(cudaMemcpy( + &ek_parameters.rho[ek_parameters.species_index[species]][index], + &num_particles, sizeof(ekfloat), cudaMemcpyHostToDevice)); return 0; } @@ -3009,30 +2903,26 @@ int ek_print_vtk_flux(int species, char *filename) { unsigned int coord[3]; - auto *fluxes = (ekfloat *)Utils::malloc(ek_parameters.number_of_nodes * 13 * - sizeof(ekfloat)); + std::vector fluxes(ek_parameters.number_of_nodes * 13); - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = - static_cast((ek_parameters.number_of_nodes + - threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block); KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block, - ek_parameters.species_index[species], *current_nodes, node_f, - ek_lbparameters_gpu, ek_lb_device_values, philox_counter.value()); + static_cast(ek_parameters.species_index[species]), + *current_nodes, node_f, ek_lbparameters_gpu, ek_lb_device_values, + philox_counter.value()); reset_LB_force_densities_GPU(false); #ifdef EK_BOUNDARIES KERNELCALL(ek_apply_boundaries, dim_grid, threads_per_block, - ek_parameters.species_index[species], *current_nodes, node_f); + static_cast(ek_parameters.species_index[species]), + *current_nodes, node_f); #endif - cuda_safe_mem(cudaMemcpy(fluxes, ek_parameters.j, - ek_parameters.number_of_nodes * 13 * sizeof(ekfloat), + cuda_safe_mem(cudaMemcpy(fluxes.data(), ek_parameters.j, + fluxes.size() * sizeof(ekfloat), cudaMemcpyDeviceToHost)); fprintf(fp, "\ @@ -3054,7 +2944,7 @@ LOOKUP_TABLE default\n", ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid, ek_parameters.number_of_nodes, species); - for (int i = 0; i < ek_parameters.number_of_nodes; i++) { + for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) { rhoindex_linear2cartesian_host(i, coord); flux_local_cartesian[0] = @@ -3219,7 +3109,6 @@ LOOKUP_TABLE default\n", (ek_parameters.time_step * Utils::sqr(ek_parameters.agrid))); } - free(fluxes); fclose(fp); return 0; @@ -3229,6 +3118,10 @@ int ek_print_vtk_flux_fluc(int species, char *filename) { #ifndef EK_DEBUG return 1; #else + if (ek_parameters.species_index[species] == -1) { + return 1; + } + FILE *fp = fopen(filename, "w"); ekfloat flux_local_cartesian[3]; // temporary variable for converting fluxes // into cartesian coordinates for output @@ -3239,39 +3132,30 @@ int ek_print_vtk_flux_fluc(int species, char *filename) { return 1; } - ekfloat *fluxes = (ekfloat *)Utils::malloc(ek_parameters.number_of_nodes * - 13 * sizeof(ekfloat)); + std::vector fluxes(ek_parameters.number_of_nodes * 13); - if (ek_parameters.species_index[species] != -1) { - int threads_per_block = 64; - int blocks_per_grid_y = 4; - int blocks_per_grid_x = (ek_parameters.number_of_nodes + - threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); - KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block); - KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block, - ek_parameters.species_index[species], *current_nodes, node_f, - ek_lbparameters_gpu, ek_lb_device_values, - philox_counter.value()); - reset_LB_force_densities_GPU(false); + KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block); + KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block, + static_cast(ek_parameters.species_index[species]), + *current_nodes, node_f, ek_lbparameters_gpu, ek_lb_device_values, + philox_counter.value()); + reset_LB_force_densities_GPU(false); #ifdef EK_BOUNDARIES - KERNELCALL(ek_apply_boundaries, dim_grid, threads_per_block, - ek_parameters.species_index[species], *current_nodes, node_f); + KERNELCALL(ek_apply_boundaries, dim_grid, threads_per_block, + ek_parameters.species_index[species], *current_nodes, node_f); #endif - cuda_safe_mem( - cudaMemcpy(fluxes, ek_parameters.j_fluc, - ek_parameters.number_of_nodes * 13 * sizeof(ekfloat), - cudaMemcpyDeviceToHost)); - } else - return 1; + cuda_safe_mem(cudaMemcpy(fluxes.data(), ek_parameters.j_fluc, + fluxes.size() * sizeof(ekfloat), + cudaMemcpyDeviceToHost)); fprintf(fp, "\ # vtk DataFile Version 2.0\n\ -flux_%d\n\ +flux_fluc_%d\n\ ASCII\n\ \n\ DATASET STRUCTURED_POINTS\n\ @@ -3280,7 +3164,7 @@ ORIGIN %f %f %f\n\ SPACING %f %f %f\n\ \n\ POINT_DATA %u\n\ -SCALARS flux_%d float 3\n\ +SCALARS flux_fluc_%d float 4\n\ LOOKUP_TABLE default\n", species, ek_parameters.dim_x, ek_parameters.dim_y, ek_parameters.dim_z, ek_parameters.agrid * 0.5f, @@ -3288,7 +3172,7 @@ LOOKUP_TABLE default\n", ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid, ek_parameters.number_of_nodes, species); - for (int i = 0; i < ek_parameters.number_of_nodes; i++) { + for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) { float flux_local_linksum = 0; rhoindex_linear2cartesian_host(i, coord); @@ -3458,7 +3342,6 @@ LOOKUP_TABLE default\n", flux_local_linksum / (ek_parameters.agrid * ek_parameters.agrid)); } - free(fluxes); fclose(fp); return 0; @@ -3479,35 +3362,31 @@ int ek_print_vtk_flux_link(int species, char *filename) { unsigned int coord[3]; - auto *fluxes = (ekfloat *)Utils::malloc(ek_parameters.number_of_nodes * 13 * - sizeof(ekfloat)); + std::vector fluxes(ek_parameters.number_of_nodes * 13); - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = - static_cast((ek_parameters.number_of_nodes + - threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); KERNELCALL(ek_clear_fluxes, dim_grid, threads_per_block); KERNELCALL(ek_calculate_quantities, dim_grid, threads_per_block, - ek_parameters.species_index[species], *current_nodes, node_f, - ek_lbparameters_gpu, ek_lb_device_values, philox_counter.value()); + static_cast(ek_parameters.species_index[species]), + *current_nodes, node_f, ek_lbparameters_gpu, ek_lb_device_values, + philox_counter.value()); reset_LB_force_densities_GPU(false); #ifdef EK_BOUNDARIES KERNELCALL(ek_apply_boundaries, dim_grid, threads_per_block, - ek_parameters.species_index[species], *current_nodes, node_f); + static_cast(ek_parameters.species_index[species]), + *current_nodes, node_f); #endif - cuda_safe_mem(cudaMemcpy(fluxes, ek_parameters.j, - ek_parameters.number_of_nodes * 13 * sizeof(ekfloat), + cuda_safe_mem(cudaMemcpy(fluxes.data(), ek_parameters.j, + fluxes.size() * sizeof(ekfloat), cudaMemcpyDeviceToHost)); fprintf(fp, "\ # vtk DataFile Version 2.0\n\ -flux_%d\n\ +flux_link_%d\n\ ASCII\n\ \n\ DATASET STRUCTURED_POINTS\n\ @@ -3516,7 +3395,7 @@ ORIGIN %f %f %f\n\ SPACING %f %f %f\n\ \n\ POINT_DATA %u\n\ -SCALARS flux_%d float 3\n\ +SCALARS flux_link_%d float 13\n\ LOOKUP_TABLE default\n", species, ek_parameters.dim_x, ek_parameters.dim_y, ek_parameters.dim_z, ek_parameters.agrid * 0.5f, @@ -3524,10 +3403,10 @@ LOOKUP_TABLE default\n", ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid, ek_parameters.number_of_nodes, species); - for (int i = 0; i < ek_parameters.number_of_nodes; i++) { + for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) { rhoindex_linear2cartesian_host(i, coord); - fprintf(fp, "%e %e %e %e %e %e %e %e %e %e %e %e %e \n", + fprintf(fp, "%e %e %e %e %e %e %e %e %e %e %e %e %e\n", fluxes[jindex_getByRhoLinear_host(i, 0)], fluxes[jindex_getByRhoLinear_host(i, 1)], fluxes[jindex_getByRhoLinear_host(i, 2)], @@ -3543,19 +3422,19 @@ LOOKUP_TABLE default\n", fluxes[jindex_getByRhoLinear_host(i, 12)]); } - free(fluxes); fclose(fp); return 0; } int ek_node_print_potential(int x, int y, int z, double *potential) { - auto i = - static_cast(z * ek_parameters.dim_y * ek_parameters.dim_x_padded + - y * ek_parameters.dim_x_padded + x); + auto const index = static_cast(z) * ek_parameters.dim_y * + ek_parameters.dim_x_padded + + static_cast(y) * ek_parameters.dim_x_padded + + static_cast(x); float pot; - cuda_safe_mem(cudaMemcpy(&pot, &ek_parameters.charge_potential[i], + cuda_safe_mem(cudaMemcpy(&pot, &ek_parameters.charge_potential[index], 1 * sizeof(cufftReal), cudaMemcpyDeviceToHost)); *potential = pot; @@ -3570,15 +3449,14 @@ int ek_print_vtk_potential(char *filename) { return 1; } - auto *potential = - (float *)Utils::malloc(ek_parameters.number_of_nodes * sizeof(cufftReal)); + std::vector potential(ek_parameters.number_of_nodes); - cuda_safe_mem(cudaMemcpy2D(potential, ek_parameters.dim_x * sizeof(cufftReal), - ek_parameters.charge_potential, - ek_parameters.dim_x_padded * sizeof(cufftReal), - ek_parameters.dim_x * sizeof(cufftReal), - ek_parameters.dim_z * ek_parameters.dim_y, - cudaMemcpyDeviceToHost)); + cuda_safe_mem(cudaMemcpy2D( + potential.data(), ek_parameters.dim_x * sizeof(cufftReal), + ek_parameters.charge_potential, + ek_parameters.dim_x_padded * sizeof(cufftReal), + ek_parameters.dim_x * sizeof(cufftReal), + ek_parameters.dim_z * ek_parameters.dim_y, cudaMemcpyDeviceToHost)); fprintf(fp, "\ # vtk DataFile Version 2.0\n\ @@ -3598,11 +3476,10 @@ LOOKUP_TABLE default\n", ek_parameters.agrid * 0.5f, ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid, ek_parameters.number_of_nodes); - for (int i = 0; i < ek_parameters.number_of_nodes; i++) { + for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) { fprintf(fp, "%e\n", potential[i]); } - free(potential); fclose(fp); return 0; @@ -3616,15 +3493,14 @@ int ek_print_vtk_particle_potential(char *filename) { return 1; } - auto *potential = - (float *)Utils::malloc(ek_parameters.number_of_nodes * sizeof(cufftReal)); + std::vector potential(ek_parameters.number_of_nodes); - cuda_safe_mem(cudaMemcpy2D(potential, ek_parameters.dim_x * sizeof(cufftReal), - ek_parameters.charge_potential_buffer, - ek_parameters.dim_x_padded * sizeof(cufftReal), - ek_parameters.dim_x * sizeof(cufftReal), - ek_parameters.dim_z * ek_parameters.dim_y, - cudaMemcpyDeviceToHost)); + cuda_safe_mem(cudaMemcpy2D( + potential.data(), ek_parameters.dim_x * sizeof(cufftReal), + ek_parameters.charge_potential_buffer, + ek_parameters.dim_x_padded * sizeof(cufftReal), + ek_parameters.dim_x * sizeof(cufftReal), + ek_parameters.dim_z * ek_parameters.dim_y, cudaMemcpyDeviceToHost)); fprintf(fp, "\ # vtk DataFile Version 2.0\n\ @@ -3644,11 +3520,10 @@ LOOKUP_TABLE default\n", ek_parameters.agrid * 0.5f, ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid, ek_parameters.number_of_nodes); - for (int i = 0; i < ek_parameters.number_of_nodes; i++) { + for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) { fprintf(fp, "%e\n", potential[i]); } - free(potential); fclose(fp); return 0; @@ -3665,11 +3540,10 @@ int ek_print_vtk_lbforce_density(char *filename) { return 1; } - auto *lbforce_density = (lbForceFloat *)Utils::malloc( - ek_parameters.number_of_nodes * 3 * sizeof(lbForceFloat)); + std::vector lbforce_density(ek_parameters.number_of_nodes * 3); cuda_safe_mem( - cudaMemcpy(lbforce_density, node_f.force_density_buf, + cudaMemcpy(lbforce_density.data(), node_f.force_density_buf, ek_parameters.number_of_nodes * 3 * sizeof(lbForceFloat), cudaMemcpyDeviceToHost)); @@ -3691,19 +3565,14 @@ LOOKUP_TABLE default\n", ek_parameters.agrid * 0.5f, ek_parameters.agrid, ek_parameters.agrid, ek_parameters.agrid, ek_parameters.number_of_nodes); - for (int i = 0; i < ek_parameters.number_of_nodes; i++) { - fprintf(fp, "%e %e %e\n", - lbforce_density[i] / (powf(ek_parameters.time_step, 2.0) * - powf(ek_parameters.agrid, 4.0)), - lbforce_density[i + ek_parameters.number_of_nodes] / - (powf(ek_parameters.time_step, 2.0) * - powf(ek_parameters.agrid, 4.0)), - lbforce_density[i + 2 * ek_parameters.number_of_nodes] / - (powf(ek_parameters.time_step, 2.0) * - powf(ek_parameters.agrid, 4.0))); + auto const norm = (Utils::int_pow<2>(ek_parameters.time_step) * + Utils::int_pow<4>(ek_parameters.agrid)); + for (unsigned i = 0; i < ek_parameters.number_of_nodes; i++) { + fprintf(fp, "%e %e %e\n", lbforce_density[i] / norm, + lbforce_density[i + ek_parameters.number_of_nodes] / norm, + lbforce_density[i + 2 * ek_parameters.number_of_nodes] / norm); } - free(lbforce_density); fclose(fp); return 0; @@ -4016,13 +3885,8 @@ ekfloat ek_get_particle_charge() { ekfloat ek_calculate_net_charge() { cuda_safe_mem(cudaMemset(charge_gpu, 0, sizeof(ekfloat))); - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = - static_cast((ek_parameters.number_of_nodes + - threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(ek_parameters.number_of_nodes, 4, threads_per_block); KERNELCALL(ek_calculate_system_charge, dim_grid, threads_per_block, charge_gpu); @@ -4049,7 +3913,7 @@ int ek_neutralize_system(int species) { ekfloat compensating_species_density = 0.0f; #ifndef EK_BOUNDARIES - for (int i = 0; i < ek_parameters.number_of_species; i++) + for (unsigned i = 0; i < ek_parameters.number_of_species; i++) compensating_species_density += ek_parameters.density[i] * ek_parameters.valency[i]; @@ -4072,7 +3936,7 @@ int ek_neutralize_system(int species) { ek_parameters.density[species_index] - (charge / ek_parameters.valency[species_index]) / (Utils::int_pow<3>(ek_parameters.agrid) * - ekfloat(ek_parameters.number_of_nodes - + ekfloat(static_cast(ek_parameters.number_of_nodes) - ek_parameters.number_of_boundary_nodes)); #endif // EK_BOUNDARIES @@ -4086,23 +3950,21 @@ int ek_neutralize_system(int species) { int ek_save_checkpoint(char *filename, char *lb_filename) { std::ofstream fout(filename, std::ofstream::binary); - auto *densities = - (ekfloat *)Utils::malloc(ek_parameters.number_of_nodes * sizeof(ekfloat)); + std::vector densities(ek_parameters.number_of_nodes); + auto const nchars = + static_cast(densities.size() * sizeof(ekfloat)); - for (int i = 0; i < ek_parameters.number_of_species; i++) { - cuda_safe_mem(cudaMemcpy(densities, ek_parameters.rho[i], - ek_parameters.number_of_nodes * sizeof(ekfloat), + for (unsigned i = 0; i < ek_parameters.number_of_species; i++) { + cuda_safe_mem(cudaMemcpy(densities.data(), ek_parameters.rho[i], + densities.size() * sizeof(ekfloat), cudaMemcpyDeviceToHost)); - if (!fout.write((char *)densities, - sizeof(ekfloat) * ek_parameters.number_of_nodes)) { - free(densities); + if (!fout.write(reinterpret_cast(densities.data()), nchars)) { fout.close(); return 1; } } - free(densities); fout.close(); lb_lbfluid_save_checkpoint(lb_filename, true); @@ -4113,23 +3975,21 @@ int ek_load_checkpoint(char *filename) { std::string fname(filename); std::ifstream fin((const char *)(fname + ".ek").c_str(), std::ifstream::binary); - auto *densities = - (ekfloat *)Utils::malloc(ek_parameters.number_of_nodes * sizeof(ekfloat)); + std::vector densities(ek_parameters.number_of_nodes); + auto const nchars = + static_cast(densities.size() * sizeof(ekfloat)); - for (int i = 0; i < ek_parameters.number_of_species; i++) { - if (!fin.read((char *)densities, - sizeof(ekfloat) * ek_parameters.number_of_nodes)) { - free(densities); + for (unsigned i = 0; i < ek_parameters.number_of_species; i++) { + if (!fin.read(reinterpret_cast(densities.data()), nchars)) { fin.close(); return 1; } - cuda_safe_mem(cudaMemcpy(ek_parameters.rho[i], densities, - ek_parameters.number_of_nodes * sizeof(ekfloat), + cuda_safe_mem(cudaMemcpy(ek_parameters.rho[i], densities.data(), + densities.size() * sizeof(ekfloat), cudaMemcpyHostToDevice)); } - free(densities); fin.close(); lb_lbfluid_load_checkpoint((char *)(fname + ".lb").c_str(), true); diff --git a/src/core/grid_based_algorithms/fd-electrostatics.cuh b/src/core/grid_based_algorithms/fd-electrostatics.cuh index 24232116441..567e37b5ab9 100644 --- a/src/core/grid_based_algorithms/fd-electrostatics.cuh +++ b/src/core/grid_based_algorithms/fd-electrostatics.cuh @@ -21,8 +21,6 @@ #include -#define PI_FLOAT 3.14159265358979323846f - class FdElectrostatics { public: struct InputParameters { @@ -67,12 +65,4 @@ private: bool initialized; }; -// extern __device__ __constant__ FdElectrostatics::Parameters -// fde_parameters_gpu; - -__device__ cufftReal fde_getNode(int x, int y, int z); -__device__ cufftReal fde_getNode(int i); -__device__ void fde_setNode(int x, int y, int z, cufftReal value); -__device__ void fde_setNode(int i, cufftReal value); - #endif diff --git a/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu b/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu index 795be9eb98c..9d869142a21 100644 --- a/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu +++ b/src/core/grid_based_algorithms/fd-electrostatics_cuda.cu @@ -21,7 +21,9 @@ #include "grid_based_algorithms/fd-electrostatics.cuh" -#include "cuda_utils.hpp" +#include "cuda_utils.cuh" + +#include #include #include @@ -34,6 +36,13 @@ #error CU-file includes mpi.h! This should not happen! #endif +static constexpr unsigned int threads_per_block = 64; + +__device__ cufftReal fde_getNode(int x, int y, int z); +__device__ cufftReal fde_getNode(int i); +__device__ void fde_setNode(int x, int y, int z, cufftReal value); +__device__ void fde_setNode(int i, cufftReal value); + __global__ void createGreensfcn(); __global__ void multiplyGreensfcn(cufftComplex *charge_potential); @@ -102,13 +111,10 @@ FdElectrostatics::FdElectrostatics(InputParameters inputParameters, cuda_safe_mem( cudaMemcpyToSymbol(fde_parameters_gpu, ¶meters, sizeof(Parameters))); - int threads_per_block = 64; - int blocks_per_grid_y = 4; - int blocks_per_grid_x = - (parameters.dim_z * parameters.dim_y * (parameters.dim_x / 2 + 1) + - threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = calculate_dim_grid( + static_cast(parameters.dim_z * parameters.dim_y * + (parameters.dim_x / 2 + 1)), + 4, threads_per_block); KERNELCALL_stream(createGreensfcn, dim_grid, threads_per_block, stream); /* create 3D FFT plans */ @@ -151,14 +157,15 @@ __global__ void createGreensfcn() { // setting 0th Fourier mode to 0 enforces charge neutrality fde_parameters_gpu->greensfcn[index] = 0.0f; } else { + constexpr cufftReal two_pi = 2.0f * Utils::pi(); fde_parameters_gpu->greensfcn[index] = - -4.0f * PI_FLOAT * fde_parameters_gpu->prefactor * + -2.0f * two_pi * fde_parameters_gpu->prefactor * fde_parameters_gpu->agrid * fde_parameters_gpu->agrid * 0.5f / - (cos(2.0f * PI_FLOAT * static_cast(coord[0]) / + (cos(two_pi * static_cast(coord[0]) / static_cast(fde_parameters_gpu->dim_x)) + - cos(2.0f * PI_FLOAT * static_cast(coord[1]) / + cos(two_pi * static_cast(coord[1]) / static_cast(fde_parameters_gpu->dim_y)) + - cos(2.0f * PI_FLOAT * static_cast(coord[2]) / + cos(two_pi * static_cast(coord[2]) / static_cast(fde_parameters_gpu->dim_z)) - 3.0f) / static_cast(fde_parameters_gpu->dim_x * @@ -193,13 +200,10 @@ void FdElectrostatics::calculatePotential(cufftComplex *charge_potential) { fprintf(stderr, "ERROR: Unable to execute FFT plan\n"); } - int threads_per_block = 64; - int blocks_per_grid_y = 4; - int blocks_per_grid_x = - (parameters.dim_z * parameters.dim_y * (parameters.dim_x / 2 + 1) + - threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = calculate_dim_grid( + static_cast(parameters.dim_z * parameters.dim_y * + (parameters.dim_x / 2 + 1)), + 4, threads_per_block); KERNELCALL(multiplyGreensfcn, dim_grid, threads_per_block, charge_potential); diff --git a/src/core/grid_based_algorithms/halo.cpp b/src/core/grid_based_algorithms/halo.cpp index a7055e684da..b90be347376 100644 --- a/src/core/grid_based_algorithms/halo.cpp +++ b/src/core/grid_based_algorithms/halo.cpp @@ -33,91 +33,43 @@ #include "halo.hpp" #include -#include #include #include +#include -/** Primitive fieldtypes and their initializers */ -struct _Fieldtype fieldtype_double = {0, nullptr, nullptr, sizeof(double), 0, - 0, 0, false, nullptr}; - -void halo_create_field_vector(int vblocks, int vstride, int vskip, - Fieldtype oldtype, Fieldtype *const newtype) { - - Fieldtype ntype = *newtype = (Fieldtype)Utils::malloc(sizeof(*ntype)); - - ntype->subtype = oldtype; - ntype->vflag = true; - - ntype->vblocks = vblocks; - ntype->vstride = vstride; - ntype->vskip = vskip; - - ntype->extent = oldtype->extent * ((vblocks - 1) * vskip + vstride); - - int count = ntype->count = oldtype->count; - ntype->lengths = (int *)Utils::malloc(count * 2 * sizeof(int)); - ntype->disps = (int *)((char *)ntype->lengths + count * sizeof(int)); - - for (int i = 0; i < count; i++) { - ntype->disps[i] = oldtype->disps[i]; - ntype->lengths[i] = oldtype->lengths[i]; - } -} - -void halo_create_field_hvector(int vblocks, int vstride, int vskip, - Fieldtype oldtype, Fieldtype *const newtype) { - - Fieldtype ntype = *newtype = (Fieldtype)Utils::malloc(sizeof(*ntype)); - - ntype->subtype = oldtype; - ntype->vflag = false; - - ntype->vblocks = vblocks; - ntype->vstride = vstride; - ntype->vskip = vskip; - - ntype->extent = oldtype->extent * vstride + (vblocks - 1) * vskip; - - int const count = ntype->count = oldtype->count; - ntype->lengths = (int *)Utils::malloc(count * 2 * sizeof(int)); - ntype->disps = (int *)((char *)ntype->lengths + count * sizeof(int)); - - for (int i = 0; i < count; i++) { - ntype->disps[i] = oldtype->disps[i]; - ntype->lengths[i] = oldtype->lengths[i]; - } -} +/** Predefined fieldtype for double-precision LB */ +static std::shared_ptr fieldtype_double = + std::make_shared(static_cast(sizeof(double))); /** Set halo region to a given value * @param[out] dest pointer to the halo buffer * @param value integer value to write into the halo buffer * @param type halo field layout description */ -void halo_dtset(char *dest, int value, Fieldtype type) { +void halo_dtset(char *dest, int value, std::shared_ptr type) { auto const vblocks = type->vblocks; auto const vstride = type->vstride; auto const vskip = type->vskip; - auto const count = type->count; - int const *const lens = type->lengths; - int const *const disps = type->disps; + auto const &lens = type->lengths; + auto const &disps = type->disps; auto const extent = type->extent; auto const block_size = static_cast(vskip) * static_cast(extent); for (int i = 0; i < vblocks; i++) { for (int j = 0; j < vstride; j++) { - for (int k = 0; k < count; k++) + for (std::size_t k = 0; k < disps.size(); k++) memset(dest + disps[k], value, lens[k]); } dest += block_size; } } -void halo_dtcopy(char *r_buffer, char *s_buffer, int count, Fieldtype type); +void halo_dtcopy(char *r_buffer, char *s_buffer, int count, + std::shared_ptr type); -void halo_copy_vector(char *r_buffer, char *s_buffer, int count, Fieldtype type, - bool vflag) { +void halo_copy_vector(char *r_buffer, char *s_buffer, int count, + std::shared_ptr type, bool vflag) { auto const vblocks = type->vblocks; auto const vstride = type->vstride; @@ -136,13 +88,14 @@ void halo_copy_vector(char *r_buffer, char *s_buffer, int count, Fieldtype type, } } -/** Copy lattice data with layout described by fieldtype. +/** Copy lattice data with layout described by @p type. * @param r_buffer data destination * @param s_buffer data source * @param count amount of data to copy * @param type field layout type */ -void halo_dtcopy(char *r_buffer, char *s_buffer, int count, Fieldtype type) { +void halo_dtcopy(char *r_buffer, char *s_buffer, int count, + std::shared_ptr type) { if (type->subtype) { halo_copy_vector(r_buffer, s_buffer, count, type, type->vflag); @@ -162,23 +115,22 @@ void halo_dtcopy(char *r_buffer, char *s_buffer, int count, Fieldtype type) { } } -void prepare_halo_communication(HaloCommunicator *const hc, - Lattice const *const lattice, - Fieldtype fieldtype, MPI_Datatype datatype, +void prepare_halo_communication(HaloCommunicator &hc, const Lattice &lattice, + MPI_Datatype datatype, const Utils::Vector3i &local_node_grid) { - const auto grid = lattice->grid; - const auto period = lattice->halo_grid; + const auto &grid = lattice.grid; + const auto &period = lattice.halo_grid; - for (int n = 0; n < hc->num; n++) { - MPI_Type_free(&(hc->halo_info[n].datatype)); + for (int n = 0; n < hc.num; n++) { + MPI_Type_free(&(hc.halo_info[n].datatype)); } int const num = 2 * 3; /* two communications in each space direction */ - hc->num = num; - hc->halo_info.resize(num); + hc.num = num; + hc.halo_info.resize(num); - auto const extent = static_cast(fieldtype->extent); + auto const extent = static_cast(fieldtype_double->extent); auto const node_neighbors = calc_node_neighbors(comm_cart); @@ -186,7 +138,7 @@ void prepare_halo_communication(HaloCommunicator *const hc, for (int dir = 0; dir < 3; dir++) { for (int lr = 0; lr < 2; lr++) { - HaloInfo *hinfo = &(hc->halo_info[cnt]); + HaloInfo &hinfo = hc.halo_info[cnt]; int nblocks = 1; for (int k = dir + 1; k < 3; k++) { @@ -203,46 +155,46 @@ void prepare_halo_communication(HaloCommunicator *const hc, if (lr == 0) { /* send to left, recv from right */ - hinfo->s_offset = extent * static_cast(stride * 1); - hinfo->r_offset = extent * static_cast(stride * (grid[dir] + 1)); + hinfo.s_offset = extent * static_cast(stride * 1); + hinfo.r_offset = extent * static_cast(stride * (grid[dir] + 1)); } else { /* send to right, recv from left */ - hinfo->s_offset = extent * static_cast(stride * grid[dir]); - hinfo->r_offset = extent * static_cast(stride * 0); + hinfo.s_offset = extent * static_cast(stride * grid[dir]); + hinfo.r_offset = extent * static_cast(stride * 0); } - hinfo->source_node = node_neighbors[2 * dir + 1 - lr]; - hinfo->dest_node = node_neighbors[2 * dir + lr]; + hinfo.source_node = node_neighbors[2 * dir + 1 - lr]; + hinfo.dest_node = node_neighbors[2 * dir + lr]; - halo_create_field_vector(nblocks, stride, skip, fieldtype, - &hinfo->fieldtype); + hinfo.fieldtype = std::make_shared(nblocks, stride, skip, true, + fieldtype_double); - MPI_Type_vector(nblocks, stride, skip, datatype, &hinfo->datatype); - MPI_Type_commit(&hinfo->datatype); + MPI_Type_vector(nblocks, stride, skip, datatype, &hinfo.datatype); + MPI_Type_commit(&hinfo.datatype); if (!box_geo.periodic(dir) && (local_geo.boundary()[2 * dir + lr] != 0 || local_geo.boundary()[2 * dir + 1 - lr] != 0)) { if (local_node_grid[dir] == 1) { - hinfo->type = HALO_OPEN; + hinfo.type = HALO_OPEN; } else if (lr == 0) { if (local_geo.boundary()[2 * dir + lr] == 1) { - hinfo->type = HALO_RECV; + hinfo.type = HALO_RECV; } else { - hinfo->type = HALO_SEND; + hinfo.type = HALO_SEND; } } else { if (local_geo.boundary()[2 * dir + lr] == -1) { - hinfo->type = HALO_RECV; + hinfo.type = HALO_RECV; } else { - hinfo->type = HALO_SEND; + hinfo.type = HALO_SEND; } } } else { if (local_node_grid[dir] == 1) { - hc->halo_info[cnt].type = HALO_LOCL; + hc.halo_info[cnt].type = HALO_LOCL; } else { - hc->halo_info[cnt].type = HALO_SENDRECV; + hc.halo_info[cnt].type = HALO_SENDRECV; } } cnt++; @@ -250,44 +202,44 @@ void prepare_halo_communication(HaloCommunicator *const hc, } } -void release_halo_communication(HaloCommunicator *const hc) { - for (int n = 0; n < hc->num; n++) { - MPI_Type_free(&(hc->halo_info[n].datatype)); +void release_halo_communication(HaloCommunicator &hc) { + for (int n = 0; n < hc.num; n++) { + MPI_Type_free(&(hc.halo_info[n].datatype)); } } -void halo_communication(HaloCommunicator const *const hc, char *const base) { +void halo_communication(const HaloCommunicator &hc, char *const base) { - Fieldtype fieldtype; + std::shared_ptr fieldtype; MPI_Datatype datatype; MPI_Request request; MPI_Status status; - for (int n = 0; n < hc->num; n++) { + for (int n = 0; n < hc.num; n++) { int s_node, r_node; - int comm_type = hc->halo_info[n].type; - char *s_buffer = (char *)base + hc->halo_info[n].s_offset; - char *r_buffer = (char *)base + hc->halo_info[n].r_offset; + int comm_type = hc.halo_info[n].type; + char *s_buffer = (char *)base + hc.halo_info[n].s_offset; + char *r_buffer = (char *)base + hc.halo_info[n].r_offset; switch (comm_type) { case HALO_LOCL: - fieldtype = hc->halo_info[n].fieldtype; + fieldtype = hc.halo_info[n].fieldtype; halo_dtcopy(r_buffer, s_buffer, 1, fieldtype); break; case HALO_SENDRECV: - datatype = hc->halo_info[n].datatype; - s_node = hc->halo_info[n].source_node; - r_node = hc->halo_info[n].dest_node; + datatype = hc.halo_info[n].datatype; + s_node = hc.halo_info[n].source_node; + r_node = hc.halo_info[n].dest_node; MPI_Sendrecv(s_buffer, 1, datatype, r_node, REQ_HALO_SPREAD, r_buffer, 1, datatype, s_node, REQ_HALO_SPREAD, comm_cart, &status); break; case HALO_SEND: - datatype = hc->halo_info[n].datatype; - fieldtype = hc->halo_info[n].fieldtype; - r_node = hc->halo_info[n].dest_node; + datatype = hc.halo_info[n].datatype; + fieldtype = hc.halo_info[n].fieldtype; + r_node = hc.halo_info[n].dest_node; MPI_Isend(s_buffer, 1, datatype, r_node, REQ_HALO_SPREAD, comm_cart, &request); halo_dtset(r_buffer, 0, fieldtype); @@ -295,15 +247,15 @@ void halo_communication(HaloCommunicator const *const hc, char *const base) { break; case HALO_RECV: - datatype = hc->halo_info[n].datatype; - s_node = hc->halo_info[n].source_node; + datatype = hc.halo_info[n].datatype; + s_node = hc.halo_info[n].source_node; MPI_Irecv(r_buffer, 1, datatype, s_node, REQ_HALO_SPREAD, comm_cart, &request); MPI_Wait(&request, &status); break; case HALO_OPEN: - fieldtype = hc->halo_info[n].fieldtype; + fieldtype = hc.halo_info[n].fieldtype; /** \todo this does not work for the n_i - \ */ halo_dtset(r_buffer, 0, fieldtype); break; diff --git a/src/core/grid_based_algorithms/halo.hpp b/src/core/grid_based_algorithms/halo.hpp index 193ee2c6ea4..4e0b8d39a96 100644 --- a/src/core/grid_based_algorithms/halo.hpp +++ b/src/core/grid_based_algorithms/halo.hpp @@ -34,6 +34,7 @@ #include +#include #include /** \name Types of halo communications */ @@ -55,26 +56,33 @@ /** Layout of the lattice data. * The description is similar to MPI datatypes but a bit more compact. - * See \ref halo_create_field_vector and \ref - * halo_dtcopy to understand how it works. */ -typedef struct _Fieldtype *Fieldtype; -struct _Fieldtype { - int count; /**< number of subtypes in fieldtype */ - int *disps; /**< displacements of the subtypes */ - int *lengths; /**< lengths of the subtypes */ - int extent; /**< extent of the complete fieldtype including gaps */ - int vblocks; /**< number of blocks in field vectors */ - int vstride; /**< size of strides in field vectors */ - int vskip; /**< displacement between strides in field vectors */ +struct FieldType { + FieldType(int new_extent) + : count(0), disps({}), lengths({}), extent(new_extent), vblocks(0), + vstride(0), vskip(0), vflag(false), subtype(nullptr) {} + FieldType(int new_vblocks, int new_vstride, int new_vskip, bool new_vflag, + std::shared_ptr oldtype) + : count(oldtype->count), disps(oldtype->disps), lengths(oldtype->lengths), + extent(0), vblocks(new_vblocks), vstride(new_vstride), vskip(new_vskip), + vflag(new_vflag), subtype(oldtype) { + if (vflag) { + extent = oldtype->extent * ((vblocks - 1) * vskip + vstride); + } else { + extent = oldtype->extent * vstride + (vblocks - 1) * vskip; + } + } + int count; /**< number of subtypes in fieldtype */ + std::vector disps; /**< displacements of the subtypes */ + std::vector lengths; /**< lengths of the subtypes */ + int extent; /**< extent of the complete fieldtype including gaps */ + int vblocks; /**< number of blocks in field vectors */ + int vstride; /**< size of strides in field vectors */ + int vskip; /**< displacement between strides in field vectors */ bool vflag; - Fieldtype subtype; + std::shared_ptr subtype; }; -/** Predefined fieldtypes */ -extern struct _Fieldtype fieldtype_double; -#define FIELDTYPE_DOUBLE (&fieldtype_double) - /** Structure describing a Halo region */ typedef struct { @@ -86,7 +94,8 @@ typedef struct { unsigned long s_offset; /**< offset for send buffer */ unsigned long r_offset; /**< offset for receive buffer */ - Fieldtype fieldtype; /**< type layout of the data being exchanged */ + std::shared_ptr + fieldtype; /**< type layout of the data being exchanged */ MPI_Datatype datatype; /**< MPI datatype of data being communicated */ } HaloInfo; @@ -102,40 +111,27 @@ class HaloCommunicator { std::vector halo_info; /**< set of halo communications */ }; -/** Creates a field vector layout - * @param vblocks number of vector blocks - * @param vstride size of strides in field vector - * @param vskip displacements of strides in field vector - * @param oldtype fieldtype the vector is composed of - * @param[out] newtype newly created fieldtype - */ -void halo_create_field_vector(int vblocks, int vstride, int vskip, - Fieldtype oldtype, Fieldtype *newtype); -void halo_create_field_hvector(int vblocks, int vstride, int vskip, - Fieldtype oldtype, Fieldtype *newtype); - /** Preparation of the halo parallelization scheme. Sets up the * necessary data structures for \ref halo_communication * @param[in,out] hc halo communicator being created * @param[in] lattice lattice the communication is created for - * @param fieldtype field layout of the lattice data * @param datatype MPI datatype for the lattice data * @param local_node_grid Number of nodes in each spatial dimension */ -void prepare_halo_communication(HaloCommunicator *hc, Lattice const *lattice, - Fieldtype fieldtype, MPI_Datatype datatype, +void prepare_halo_communication(HaloCommunicator &hc, const Lattice &lattice, + MPI_Datatype datatype, const Utils::Vector3i &local_node_grid); /** Frees data structures associated with a halo communicator * @param[in,out] hc halo communicator to be released */ -void release_halo_communication(HaloCommunicator *hc); +void release_halo_communication(HaloCommunicator &hc); /** Perform communication according to the parallelization scheme * described by the halo communicator * @param[in] hc halo communicator describing the parallelization scheme * @param[in] base base plane of local node */ -void halo_communication(HaloCommunicator const *hc, char *base); +void halo_communication(const HaloCommunicator &hc, char *base); #endif /* HALO_H */ diff --git a/src/core/grid_based_algorithms/lb.cpp b/src/core/grid_based_algorithms/lb.cpp index 1a1b3acd717..6dc4edf4040 100644 --- a/src/core/grid_based_algorithms/lb.cpp +++ b/src/core/grid_based_algorithms/lb.cpp @@ -45,7 +45,6 @@ #include #include #include -#include #include #include @@ -64,6 +63,7 @@ #include #include #include +#include #include using Utils::get_linear_index; @@ -176,11 +176,11 @@ using LB_FluidData = boost::multi_array; static LB_FluidData lbfluid_a; static LB_FluidData lbfluid_b; -/** Pointer to the velocity populations of the fluid. - * lbfluid contains pre-collision populations, lbfluid_post - * contains post-collision. +/** Span of the velocity populations of the fluid (pre-collision populations). */ LB_Fluid lbfluid; +/** Span of the velocity populations of the fluid (post-collision populations). + */ LB_Fluid lbfluid_post; std::vector lbfields; @@ -653,21 +653,20 @@ void lb_prepare_communication(HaloCommunicator &halo_comm, * datatypes */ /* prepare the communication for a single velocity */ - prepare_halo_communication(&comm, &lb_lattice, FIELDTYPE_DOUBLE, MPI_DOUBLE, - node_grid); + prepare_halo_communication(comm, lb_lattice, MPI_DOUBLE, node_grid); halo_comm.num = comm.num; halo_comm.halo_info.resize(comm.num); /* replicate the halo structure */ for (int i = 0; i < comm.num; i++) { - HaloInfo *hinfo = &(halo_comm.halo_info[i]); + HaloInfo &hinfo = halo_comm.halo_info[i]; - hinfo->source_node = comm.halo_info[i].source_node; - hinfo->dest_node = comm.halo_info[i].dest_node; - hinfo->s_offset = comm.halo_info[i].s_offset; - hinfo->r_offset = comm.halo_info[i].r_offset; - hinfo->type = comm.halo_info[i].type; + hinfo.source_node = comm.halo_info[i].source_node; + hinfo.dest_node = comm.halo_info[i].dest_node; + hinfo.s_offset = comm.halo_info[i].s_offset; + hinfo.r_offset = comm.halo_info[i].r_offset; + hinfo.type = comm.halo_info[i].type; /* generate the vector datatype for the structure of lattices we * have to use hvector here because the extent of the subtypes @@ -679,16 +678,16 @@ void lb_prepare_communication(HaloCommunicator &halo_comm, MPI_Type_get_extent(MPI_DOUBLE, &lower, &extent); MPI_Type_create_hvector(D3Q19::n_vel, 1, lb_lattice.halo_grid_volume * extent, - comm.halo_info[i].datatype, &hinfo->datatype); - MPI_Type_commit(&hinfo->datatype); + comm.halo_info[i].datatype, &hinfo.datatype); + MPI_Type_commit(&hinfo.datatype); - halo_create_field_hvector( + hinfo.fieldtype = std::make_shared( D3Q19::n_vel, 1, - static_cast(lb_lattice.halo_grid_volume * sizeof(double)), - comm.halo_info[i].fieldtype, &hinfo->fieldtype); + static_cast(lb_lattice.halo_grid_volume * sizeof(double)), false, + comm.halo_info[i].fieldtype); } - release_halo_communication(&comm); + release_halo_communication(comm); } /***********************************************************************/ @@ -743,7 +742,6 @@ void lb_set_population_from_density_momentum_density_stress( } /**@}*/ -/** Calculation of hydrodynamic modes */ std::array lb_calc_modes(Lattice::index_t index, const LB_Fluid &lb_fluid) { return Utils::matrix_vector_product( @@ -895,10 +893,10 @@ auto lb_next_offsets(const Lattice &lb_lattice, } template -void lb_stream(LB_Fluid &lbfluid, const std::array &populations, +void lb_stream(LB_Fluid &lb_fluid, const std::array &populations, size_t index, std::array const &offsets) { for (int i = 0; i < populations.size(); i++) { - lbfluid[i][index + offsets[i]] = populations[i]; + lb_fluid[i][index + offsets[i]] = populations[i]; } } @@ -971,7 +969,7 @@ void lb_collide_stream() { /* swap the pointers for old and new population fields */ std::swap(lbfluid, lbfluid_post); - halo_communication(&update_halo_comm, + halo_communication(update_halo_comm, reinterpret_cast(lbfluid[0].data())); #ifdef ADDITIONAL_CHECKS @@ -1000,31 +998,25 @@ void lattice_boltzmann_update() { /** \name Coupling part */ /***********************************************************************/ /**@{*/ - -static int compare_buffers(double *buf1, double *buf2, int size) { - int ret; - if (memcmp(buf1, buf2, size) != 0) { +#ifdef ADDITIONAL_CHECKS +template int compare_buffers(T const &buff_a, T const &buff_b) { + if (buff_a != buff_b) { runtimeErrorMsg() << "Halo buffers are not identical"; - ret = 1; - } else { - ret = 0; + return ES_ERROR; } - return ret; + return ES_OK; } -#ifdef ADDITIONAL_CHECKS /** Check consistency of the halo regions. * Test whether the halo regions have been exchanged correctly. */ void lb_check_halo_regions(const LB_Fluid &lb_fluid, const Lattice &lb_lattice) { Lattice::index_t index; - int i, x, y, z, s_node, r_node, count = D3Q19::n_vel; - double *s_buffer, *r_buffer; - MPI_Status status[2]; - - r_buffer = (double *)Utils::malloc(count * sizeof(double)); - s_buffer = (double *)Utils::malloc(count * sizeof(double)); + std::size_t i; + int x, y, z, s_node, r_node; + std::array s_buffer; + std::array r_buffer; auto const node_neighbors = calc_node_neighbors(comm_cart); @@ -1038,22 +1030,19 @@ void lb_check_halo_regions(const LB_Fluid &lb_fluid, s_node = node_neighbors[1]; r_node = node_neighbors[0]; if (n_nodes > 1) { - MPI_Sendrecv(s_buffer, count, MPI_DOUBLE, r_node, REQ_HALO_CHECK, - r_buffer, count, MPI_DOUBLE, s_node, REQ_HALO_CHECK, - comm_cart, status); + comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, + REQ_HALO_CHECK, r_buffer); index = get_linear_index(lb_lattice.grid[0], y, z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double))); + compare_buffers(s_buffer, r_buffer); } else { index = get_linear_index(lb_lattice.grid[0], y, z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double)))) { + if (compare_buffers(s_buffer, r_buffer)) { std::cerr << "buffers differ in dir=" << 0 << " at index=" << index << " y=" << y << " z=" << z << "\n"; } @@ -1067,20 +1056,17 @@ void lb_check_halo_regions(const LB_Fluid &lb_fluid, s_node = node_neighbors[0]; r_node = node_neighbors[1]; if (n_nodes > 1) { - MPI_Sendrecv(s_buffer, count, MPI_DOUBLE, r_node, REQ_HALO_CHECK, - r_buffer, count, MPI_DOUBLE, s_node, REQ_HALO_CHECK, - comm_cart, status); + comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, + REQ_HALO_CHECK, r_buffer); index = get_linear_index(1, y, z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double))); + compare_buffers(s_buffer, r_buffer); } else { index = get_linear_index(1, y, z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double)))) { + if (compare_buffers(s_buffer, r_buffer)) { std::cerr << "buffers differ in dir=0 at index=" << index << " y=" << y << " z=" << z << "\n"; } @@ -1099,22 +1085,19 @@ void lb_check_halo_regions(const LB_Fluid &lb_fluid, s_node = node_neighbors[3]; r_node = node_neighbors[2]; if (n_nodes > 1) { - MPI_Sendrecv(s_buffer, count, MPI_DOUBLE, r_node, REQ_HALO_CHECK, - r_buffer, count, MPI_DOUBLE, s_node, REQ_HALO_CHECK, - comm_cart, status); + comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, + REQ_HALO_CHECK, r_buffer); index = get_linear_index(x, lb_lattice.grid[1], z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double))); + compare_buffers(s_buffer, r_buffer); } else { index = get_linear_index(x, lb_lattice.grid[1], z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double)))) { + if (compare_buffers(s_buffer, r_buffer)) { std::cerr << "buffers differ in dir=1 at index=" << index << " x=" << x << " z=" << z << "\n"; } @@ -1129,20 +1112,17 @@ void lb_check_halo_regions(const LB_Fluid &lb_fluid, s_node = node_neighbors[2]; r_node = node_neighbors[3]; if (n_nodes > 1) { - MPI_Sendrecv(s_buffer, count, MPI_DOUBLE, r_node, REQ_HALO_CHECK, - r_buffer, count, MPI_DOUBLE, s_node, REQ_HALO_CHECK, - comm_cart, status); + comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, + REQ_HALO_CHECK, r_buffer); index = get_linear_index(x, 1, z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double))); + compare_buffers(s_buffer, r_buffer); } else { index = get_linear_index(x, 1, z, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double)))) { + if (compare_buffers(s_buffer, r_buffer)) { std::cerr << "buffers differ in dir=1 at index=" << index << " x=" << x << " z=" << z << "\n"; } @@ -1161,22 +1141,19 @@ void lb_check_halo_regions(const LB_Fluid &lb_fluid, s_node = node_neighbors[5]; r_node = node_neighbors[4]; if (n_nodes > 1) { - MPI_Sendrecv(s_buffer, count, MPI_DOUBLE, r_node, REQ_HALO_CHECK, - r_buffer, count, MPI_DOUBLE, s_node, REQ_HALO_CHECK, - comm_cart, status); + comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, + REQ_HALO_CHECK, r_buffer); index = get_linear_index(x, y, lb_lattice.grid[2], lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double))); + compare_buffers(s_buffer, r_buffer); } else { index = get_linear_index(x, y, lb_lattice.grid[2], lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double)))) { + if (compare_buffers(s_buffer, r_buffer)) { std::cerr << "buffers differ in dir=2 at index=" << index << " x=" << x << " y=" << y << " z=" << lb_lattice.grid[2] << "\n"; @@ -1194,20 +1171,17 @@ void lb_check_halo_regions(const LB_Fluid &lb_fluid, s_node = node_neighbors[4]; r_node = node_neighbors[5]; if (n_nodes > 1) { - MPI_Sendrecv(s_buffer, count, MPI_DOUBLE, r_node, REQ_HALO_CHECK, - r_buffer, count, MPI_DOUBLE, s_node, REQ_HALO_CHECK, - comm_cart, status); + comm_cart.sendrecv(r_node, REQ_HALO_CHECK, s_buffer, s_node, + REQ_HALO_CHECK, r_buffer); index = get_linear_index(x, y, 1, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) s_buffer[i] = lb_fluid[i][index]; - compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double))); + compare_buffers(s_buffer, r_buffer); } else { index = get_linear_index(x, y, 1, lb_lattice.halo_grid); for (i = 0; i < D3Q19::n_vel; i++) r_buffer[i] = lb_fluid[i][index]; - if (compare_buffers(s_buffer, r_buffer, - count * static_cast(sizeof(double)))) { + if (compare_buffers(s_buffer, r_buffer)) { std::cerr << "buffers differ in dir=2 at index=" << index << " x=" << x << " y=" << y << "\n"; } @@ -1215,9 +1189,6 @@ void lb_check_halo_regions(const LB_Fluid &lb_fluid, } } } - - free(r_buffer); - free(s_buffer); } #endif // ADDITIONAL_CHECKS @@ -1342,7 +1313,8 @@ void lb_bounce_back(LB_Fluid &lb_fluid, const LB_Parameters &lb_parameters, /** Calculate the local fluid momentum. * The calculation is implemented explicitly for the special case of D3Q19. - * @param[in] index Local lattice site + * @param[in] index Local lattice site + * @param[in] lb_fluid Populations of the fluid * @retval The local fluid momentum. */ Utils::Vector3d lb_calc_local_momentum_density(Lattice::index_t index, @@ -1361,9 +1333,11 @@ Utils::Vector3d lb_calc_local_momentum_density(Lattice::index_t index, lb_fluid[18][index]}}; } -// Statistics in MD units. /** Calculate momentum of the LB fluid. - * \param result Fluid momentum + * @param[out] result Fluid momentum in MD units + * @param[in] lb_parameters LB parameters + * @param[in] lb_fields Hydrodynamic fields of the fluid + * @param[in] lb_lattice The underlying lattice */ void lb_calc_fluid_momentum(double *result, const LB_Parameters &lb_parameters, const std::vector &lb_fields, diff --git a/src/core/grid_based_algorithms/lb.hpp b/src/core/grid_based_algorithms/lb.hpp index f1dcb52894b..5089ec99499 100644 --- a/src/core/grid_based_algorithms/lb.hpp +++ b/src/core/grid_based_algorithms/lb.hpp @@ -144,10 +144,7 @@ void lb_reinit_fluid(std::vector &lb_fields, const LB_Parameters &lb_parameters); void lb_reinit_parameters(LB_Parameters &lb_parameters); -/** Pointer to the velocity populations of the fluid. - * lbfluid contains pre-collision populations, lbfluid_post - * contains post-collision populations - */ + using LB_Fluid = std::array, 19>; extern LB_Fluid lbfluid; @@ -172,7 +169,7 @@ template auto get(const LB_Fluid_Ref &lb_fluid) { } // namespace Utils -/** Pointer to the hydrodynamic fields of the fluid */ +/** Hydrodynamic fields of the fluid */ extern std::vector lbfields; /************************************************************/ @@ -213,7 +210,8 @@ Utils::Vector6d lb_calc_pressure_tensor(std::array const &modes, /** Calculation of hydrodynamic modes. * - * @param index number of the node to calculate the modes for + * @param[in] index Number of the node to calculate the modes for + * @param[in] lb_fluid Populations of the fluid * @retval Array containing the modes. */ std::array lb_calc_modes(Lattice::index_t index, diff --git a/src/core/grid_based_algorithms/lb_boundaries.cpp b/src/core/grid_based_algorithms/lb_boundaries.cpp index 1df5ca9e854..3f7338ef2b3 100644 --- a/src/core/grid_based_algorithms/lb_boundaries.cpp +++ b/src/core/grid_based_algorithms/lb_boundaries.cpp @@ -161,7 +161,7 @@ void lb_init_boundaries() { return; } ek_init_boundaries(); - int number_of_boundnodes = 0; + unsigned number_of_boundnodes = 0; std::vector host_boundary_node_list; std::vector host_boundary_index_list; size_t size_of_index; @@ -277,7 +277,7 @@ Utils::Vector3d lbboundary_get_force(LBBoundary const *lbb) { std::vector forces(3 * lbboundaries.size()); if (lattice_switch == ActiveLB::GPU) { #if defined(LB_BOUNDARIES_GPU) && defined(CUDA) - lb_gpu_get_boundary_forces(forces.data()); + lb_gpu_get_boundary_forces(forces); #endif } else if (lattice_switch == ActiveLB::CPU) { #if defined(LB_BOUNDARIES) diff --git a/src/core/grid_based_algorithms/lb_collective_interface.cpp b/src/core/grid_based_algorithms/lb_collective_interface.cpp index f800bec579a..a2e1cb8e13c 100644 --- a/src/core/grid_based_algorithms/lb_collective_interface.cpp +++ b/src/core/grid_based_algorithms/lb_collective_interface.cpp @@ -81,11 +81,20 @@ mpi_lb_get_interpolated_velocity(Utils::Vector3d const &pos) { REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_interpolated_velocity) +boost::optional +mpi_lb_get_interpolated_density(Utils::Vector3d const &pos) { + return detail::lb_calc_for_pos(pos, [&](auto pos) { + return lb_lbinterpolation_get_interpolated_density(pos); + }); +} + +REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_interpolated_density) + auto mpi_lb_get_density(Utils::Vector3i const &index) { - return detail::lb_calc_fluid_kernel(index, - [&](auto modes, auto force_density) { - return lb_calc_density(modes, lbpar); - }); + return detail::lb_calc_fluid_kernel( + index, [&](auto const &modes, auto const &force_density) { + return lb_calc_density(modes, lbpar); + }); } REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_density) @@ -138,7 +147,7 @@ REGISTER_CALLBACK(mpi_lb_set_force_density) auto mpi_lb_get_momentum_density(Utils::Vector3i const &index) { return detail::lb_calc_fluid_kernel( - index, [&](auto modes, auto force_density) { + index, [&](auto const &modes, auto const &force_density) { return lb_calc_momentum_density(modes, force_density); }); } @@ -147,7 +156,7 @@ REGISTER_CALLBACK_ONE_RANK(mpi_lb_get_momentum_density) auto mpi_lb_get_pressure_tensor(Utils::Vector3i const &index) { return detail::lb_calc_fluid_kernel( - index, [&](auto modes, auto force_density) { + index, [&](auto const &modes, auto const &force_density) { return lb_calc_pressure_tensor(modes, force_density, lbpar); }); } diff --git a/src/core/grid_based_algorithms/lb_collective_interface.hpp b/src/core/grid_based_algorithms/lb_collective_interface.hpp index 1c9afa8fb31..4b6b0272ab4 100644 --- a/src/core/grid_based_algorithms/lb_collective_interface.hpp +++ b/src/core/grid_based_algorithms/lb_collective_interface.hpp @@ -27,6 +27,8 @@ /* collective getter functions */ boost::optional mpi_lb_get_interpolated_velocity(Utils::Vector3d const &pos); +boost::optional +mpi_lb_get_interpolated_density(Utils::Vector3d const &pos); boost::optional mpi_lb_get_density(Utils::Vector3i const &index); boost::optional mpi_lb_get_populations(Utils::Vector3i const &index); diff --git a/src/core/grid_based_algorithms/lb_interface.cpp b/src/core/grid_based_algorithms/lb_interface.cpp index a48da0bda33..3fabd382a99 100644 --- a/src/core/grid_based_algorithms/lb_interface.cpp +++ b/src/core/grid_based_algorithms/lb_interface.cpp @@ -125,7 +125,7 @@ void lb_lbfluid_sanity_checks() { void lb_lbfluid_on_integration_start() { lb_lbfluid_sanity_checks(); if (lattice_switch == ActiveLB::CPU) { - halo_communication(&update_halo_comm, + halo_communication(update_halo_comm, reinterpret_cast(lbfluid[0].data())); } } @@ -1000,8 +1000,7 @@ bool lb_lbnode_is_index_valid(Utils::Vector3i const &ind) { double lb_lbnode_get_density(const Utils::Vector3i &ind) { if (lattice_switch == ActiveLB::GPU) { #ifdef CUDA - auto const single_nodeindex = ind[0] + ind[1] * lbpar_gpu.dim_x + - ind[2] * lbpar_gpu.dim_x * lbpar_gpu.dim_y; + auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind); static LB_rho_v_pi_gpu host_print_values; lb_print_node_GPU(single_nodeindex, &host_print_values); return host_print_values.rho; @@ -1020,8 +1019,7 @@ const Utils::Vector3d lb_lbnode_get_velocity(const Utils::Vector3i &ind) { if (lattice_switch == ActiveLB::GPU) { #ifdef CUDA static LB_rho_v_pi_gpu host_print_values; - auto const single_nodeindex = ind[0] + ind[1] * lbpar_gpu.dim_x + - ind[2] * lbpar_gpu.dim_x * lbpar_gpu.dim_y; + auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind); lb_print_node_GPU(single_nodeindex, &host_print_values); return {static_cast(host_print_values.v[0]), static_cast(host_print_values.v[1]), @@ -1057,8 +1055,7 @@ lb_lbnode_get_pressure_tensor_neq(const Utils::Vector3i &ind) { #ifdef CUDA Utils::Vector6d tensor{}; static LB_rho_v_pi_gpu host_print_values; - auto const single_nodeindex = ind[0] + ind[1] * lbpar_gpu.dim_x + - ind[2] * lbpar_gpu.dim_x * lbpar_gpu.dim_y; + auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind); lb_print_node_GPU(single_nodeindex, &host_print_values); for (int i = 0; i < 6; i++) { tensor[i] = static_cast(host_print_values.pi[i]); @@ -1123,8 +1120,7 @@ int lb_lbnode_get_boundary(const Utils::Vector3i &ind) { if (lattice_switch == ActiveLB::GPU) { #ifdef CUDA unsigned int host_flag; - auto const single_nodeindex = ind[0] + ind[1] * lbpar_gpu.dim_x + - ind[2] * lbpar_gpu.dim_x * lbpar_gpu.dim_y; + auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind); lb_get_boundary_flag_GPU(single_nodeindex, &host_flag); return static_cast(host_flag); #else @@ -1162,8 +1158,7 @@ const Utils::Vector19d lb_lbnode_get_pop(const Utils::Vector3i &ind) { void lb_lbnode_set_density(const Utils::Vector3i &ind, double p_density) { if (lattice_switch == ActiveLB::GPU) { #ifdef CUDA - auto const single_nodeindex = ind[0] + ind[1] * lbpar_gpu.dim_x + - ind[2] * lbpar_gpu.dim_x * lbpar_gpu.dim_y; + auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind); auto const host_density = static_cast(p_density); lb_set_node_rho_GPU(single_nodeindex, host_density); #endif // CUDA @@ -1188,8 +1183,7 @@ void lb_lbnode_set_velocity(const Utils::Vector3i &ind, host_velocity[0] = static_cast(u[0]); host_velocity[1] = static_cast(u[1]); host_velocity[2] = static_cast(u[2]); - auto const single_nodeindex = ind[0] + ind[1] * lbpar_gpu.dim_x + - ind[2] * lbpar_gpu.dim_x * lbpar_gpu.dim_y; + auto const single_nodeindex = calculate_node_index(lbpar_gpu, ind); lb_set_node_velocity_GPU(single_nodeindex, host_velocity); #endif // CUDA } else if (lattice_switch == ActiveLB::CPU) { @@ -1279,3 +1273,23 @@ lb_lbfluid_get_interpolated_velocity(const Utils::Vector3d &pos) { } throw NoLBActive(); } + +double lb_lbfluid_get_interpolated_density(const Utils::Vector3d &pos) { + auto const folded_pos = folded_position(pos, box_geo); + auto const interpolation_order = lb_lbinterpolation_get_interpolation_order(); + if (lattice_switch == ActiveLB::GPU) { + throw std::runtime_error( + "Density interpolation is not implemented for the GPU LB."); + } + if (lattice_switch == ActiveLB::CPU) { + switch (interpolation_order) { + case (InterpolationOrder::quadratic): + throw std::runtime_error("The non-linear interpolation scheme is not " + "implemented for the CPU LB."); + case (InterpolationOrder::linear): + return mpi_call(::Communication::Result::one_rank, + mpi_lb_get_interpolated_density, folded_pos); + } + } + throw NoLBActive(); +} diff --git a/src/core/grid_based_algorithms/lb_interface.hpp b/src/core/grid_based_algorithms/lb_interface.hpp index c72ef9b7f8b..9b831197a68 100644 --- a/src/core/grid_based_algorithms/lb_interface.hpp +++ b/src/core/grid_based_algorithms/lb_interface.hpp @@ -265,4 +265,11 @@ Utils::Vector3d lb_lbfluid_calc_fluid_momentum(); const Utils::Vector3d lb_lbfluid_get_interpolated_velocity(const Utils::Vector3d &pos); +/** + * @brief Calculates the interpolated fluid density on the master process. + * @param pos Position at which the density is to be calculated. + * @retval interpolated fluid density. + */ +double lb_lbfluid_get_interpolated_density(const Utils::Vector3d &pos); + #endif diff --git a/src/core/grid_based_algorithms/lb_interpolation.cpp b/src/core/grid_based_algorithms/lb_interpolation.cpp index ea0cd46e1dd..d0db33f33ae 100644 --- a/src/core/grid_based_algorithms/lb_interpolation.cpp +++ b/src/core/grid_based_algorithms/lb_interpolation.cpp @@ -82,6 +82,16 @@ Utils::Vector3d node_u(Lattice::index_t index) { return Utils::Vector3d{modes[1], modes[2], modes[3]} / local_density; } +double node_dens(Lattice::index_t index) { +#ifdef LB_BOUNDARIES + if (lbfields[index].boundary) { + return lbpar.density; + } +#endif // LB_BOUNDARIES + auto const modes = lb_calc_modes(index, lbfluid); + return lbpar.density + modes[0]; +} + } // namespace const Utils::Vector3d @@ -98,6 +108,19 @@ lb_lbinterpolation_get_interpolated_velocity(const Utils::Vector3d &pos) { return interpolated_u; } +double lb_lbinterpolation_get_interpolated_density(const Utils::Vector3d &pos) { + double interpolated_dens = 0.; + + /* Calculate fluid density at the position. + This is done by linear interpolation (eq. (11) @cite ahlrichs99a) */ + lattice_interpolation(lblattice, pos, + [&interpolated_dens](Lattice::index_t index, double w) { + interpolated_dens += w * node_dens(index); + }); + + return interpolated_dens; +} + void lb_lbinterpolation_add_force_density( const Utils::Vector3d &pos, const Utils::Vector3d &force_density) { switch (interpolation_order) { diff --git a/src/core/grid_based_algorithms/lb_interpolation.hpp b/src/core/grid_based_algorithms/lb_interpolation.hpp index 68d9c6e1be6..28544afa9ef 100644 --- a/src/core/grid_based_algorithms/lb_interpolation.hpp +++ b/src/core/grid_based_algorithms/lb_interpolation.hpp @@ -41,6 +41,13 @@ InterpolationOrder lb_lbinterpolation_get_interpolation_order(); const Utils::Vector3d lb_lbinterpolation_get_interpolated_velocity(const Utils::Vector3d &p); +/** + * @brief Calculates the fluid density at a given position of the + * lattice. + * @note It can lead to undefined behaviour if the + * position is not within the local lattice. */ +double lb_lbinterpolation_get_interpolated_density(const Utils::Vector3d &p); + /** * @brief Add a force density to the fluid at the given position. */ diff --git a/src/core/grid_based_algorithms/lbgpu.cpp b/src/core/grid_based_algorithms/lbgpu.cpp index 7fed91b23df..aa30a96c9ed 100644 --- a/src/core/grid_based_algorithms/lbgpu.cpp +++ b/src/core/grid_based_algorithms/lbgpu.cpp @@ -207,7 +207,7 @@ void lb_init_gpu() { /* set parameters for transfer to gpu */ lb_reinit_parameters_gpu(); - lb_init_GPU(&lbpar_gpu); + lb_init_GPU(lbpar_gpu); gpu_init_particle_comm(); cuda_bcast_global_part_params(); diff --git a/src/core/grid_based_algorithms/lbgpu.cuh b/src/core/grid_based_algorithms/lbgpu.cuh index daa989e27ba..7bb8f94bcca 100644 --- a/src/core/grid_based_algorithms/lbgpu.cuh +++ b/src/core/grid_based_algorithms/lbgpu.cuh @@ -32,11 +32,10 @@ #include -#ifdef CUDA /** Velocity densities for the lattice Boltzmann system. */ struct LB_nodes_gpu { /** velocity density of the node */ - float *vd = nullptr; + float *populations = nullptr; unsigned int *boundary = nullptr; Utils::Array *boundary_velocity = nullptr; }; @@ -73,7 +72,6 @@ inline __device__ float4 random_wrapper_philox(unsigned int index, (CURAND_2POW32_INV / 2.0f); return rnd_floats; } -#endif // CUDA #endif // CUDA #endif diff --git a/src/core/grid_based_algorithms/lbgpu.hpp b/src/core/grid_based_algorithms/lbgpu.hpp index 857fd23dc4b..90d55f72af4 100644 --- a/src/core/grid_based_algorithms/lbgpu.hpp +++ b/src/core/grid_based_algorithms/lbgpu.hpp @@ -31,6 +31,7 @@ #include "OptionalCounter.hpp" #include +#include #include #include @@ -48,7 +49,6 @@ typedef double lbForceFloat; typedef float lbForceFloat; #endif -/**-------------------------------------------------------------------------*/ /** Parameters for the lattice Boltzmann system for GPU. */ struct LB_parameters_gpu { /** number density (LB units) */ @@ -110,40 +110,30 @@ struct LB_rho_v_gpu { float rho; /** velocity of the node */ - float v[3]; + Utils::Array v; }; /* this structure is almost duplicated for memory efficiency. When the stress tensor element are needed at every timestep, this features should be explicitly switched on */ -typedef struct { +struct LB_rho_v_pi_gpu { /** density of the node */ float rho; /** velocity of the node */ - float v[3]; + Utils::Array v; /** pressure tensor */ - float pi[6]; -} LB_rho_v_pi_gpu; - -typedef struct { + Utils::Array pi; +}; +struct LB_node_force_density_gpu { lbForceFloat *force_density; #if defined(VIRTUAL_SITES_INERTIALESS_TRACERS) || defined(EK_DEBUG) // We need the node forces for the velocity interpolation at the virtual - // particles' position However, LBM wants to reset them immediately after the - // LBM update This variable keeps a backup + // particles' position. However, LBM wants to reset them immediately + // after the LBM update. This variable keeps a backup lbForceFloat *force_density_buf; #endif - -} LB_node_force_density_gpu; - -typedef struct { - - float force_density[3]; - - unsigned int index; - -} LB_extern_nodeforcedensity_gpu; +}; /************************************************************/ /** \name Exported Variables */ @@ -157,6 +147,8 @@ extern std::vector host_values; extern LB_node_force_density_gpu node_f; extern bool ek_initialized; #endif +extern OptionalCounter rng_counter_fluid_gpu; +extern OptionalCounter rng_counter_coupling_gpu; /**@}*/ @@ -167,9 +159,9 @@ extern bool ek_initialized; void lb_GPU_sanity_checks(); -void lb_get_device_values_pointer(LB_rho_v_gpu **pointeradress); -void lb_get_boundary_force_pointer(float **pointeradress); -void lb_get_para_pointer(LB_parameters_gpu **pointeradress); +void lb_get_device_values_pointer(LB_rho_v_gpu **pointer_address); +void lb_get_boundary_force_pointer(float **pointer_address); +void lb_get_para_pointer(LB_parameters_gpu **pointer_address); void lattice_boltzmann_update_gpu(); /** Perform a full initialization of the lattice Boltzmann system. @@ -188,14 +180,15 @@ void lb_reinit_fluid_gpu(); /** Reset the forces on the fluid nodes */ void reset_LB_force_densities_GPU(bool buffer = true); -void lb_init_GPU(LB_parameters_gpu *lbpar_gpu); +void lb_init_GPU(const LB_parameters_gpu &lbpar_gpu); void lb_integrate_GPU(); void lb_get_values_GPU(LB_rho_v_pi_gpu *host_values); -void lb_print_node_GPU(int single_nodeindex, +void lb_print_node_GPU(unsigned single_nodeindex, LB_rho_v_pi_gpu *host_print_values); #ifdef LB_BOUNDARIES_GPU -void lb_init_boundaries_GPU(int n_lb_boundaries, int number_of_boundnodes, +void lb_init_boundaries_GPU(std::size_t n_lb_boundaries, + unsigned number_of_boundnodes, int *host_boundary_node_list, int *host_boundary_index_list, float *lb_bounday_velocity); @@ -208,16 +201,17 @@ void lb_calc_particle_lattice_ia_gpu(bool couple_virtual, double friction); void lb_calc_fluid_mass_GPU(double *mass); void lb_calc_fluid_momentum_GPU(double *host_mom); -void lb_get_boundary_flag_GPU(int single_nodeindex, unsigned int *host_flag); +void lb_get_boundary_flag_GPU(unsigned int single_nodeindex, + unsigned int *host_flag); void lb_get_boundary_flags_GPU(unsigned int *host_bound_array); -void lb_set_node_velocity_GPU(int single_nodeindex, float *host_velocity); -void lb_set_node_rho_GPU(int single_nodeindex, float host_rho); +void lb_set_node_velocity_GPU(unsigned single_nodeindex, float *host_velocity); +void lb_set_node_rho_GPU(unsigned single_nodeindex, float host_rho); void reinit_parameters_GPU(LB_parameters_gpu *lbpar_gpu); void lb_reinit_extern_nodeforce_GPU(LB_parameters_gpu *lbpar_gpu); void lb_reinit_GPU(LB_parameters_gpu *lbpar_gpu); -void lb_gpu_get_boundary_forces(double *forces); +void lb_gpu_get_boundary_forces(std::vector &forces); void lb_save_checkpoint_GPU(float *host_checkpoint_vd); void lb_load_checkpoint_GPU(float const *host_checkpoint_vd); @@ -236,9 +230,17 @@ uint64_t lb_fluid_get_rng_state_gpu(); void lb_fluid_set_rng_state_gpu(uint64_t counter); uint64_t lb_coupling_get_rng_state_gpu(); void lb_coupling_set_rng_state_gpu(uint64_t counter); + +/** Calculate the node index from its coordinates */ +inline unsigned int calculate_node_index(LB_parameters_gpu const &lbpar, + Utils::Vector3i const &coord) { + return static_cast(Utils::get_linear_index( + coord, Utils::Vector3i{static_cast(lbpar.dim_x), + static_cast(lbpar.dim_y), + static_cast(lbpar.dim_z)})); +} /**@}*/ -extern OptionalCounter rng_counter_fluid_gpu; -extern OptionalCounter rng_counter_coupling_gpu; + #endif /* CUDA */ -#endif /* CUDA_H */ +#endif /* LBGPU_HPP */ diff --git a/src/core/grid_based_algorithms/lbgpu_cuda.cu b/src/core/grid_based_algorithms/lbgpu_cuda.cu index edc73900c90..f9b18c80851 100644 --- a/src/core/grid_based_algorithms/lbgpu_cuda.cu +++ b/src/core/grid_based_algorithms/lbgpu_cuda.cu @@ -33,12 +33,12 @@ #include "grid_based_algorithms/lbgpu.hpp" #include "cuda_interface.hpp" -#include "cuda_utils.hpp" +#include "cuda_utils.cuh" #include "errorhandling.hpp" +#include "lbgpu.hpp" #include #include -#include #include #include @@ -49,6 +49,7 @@ #include #include +#include #include #include #include @@ -58,13 +59,13 @@ extern int this_node; -/** device_rho_v: struct for hydrodynamic fields: this is for internal use - * (i.e. stores values in LB units) and should not used for +/** struct for hydrodynamic fields: this is for internal use + * (i.e. stores values in LB units) and should not be used for * printing values */ static LB_rho_v_gpu *device_rho_v = nullptr; -/** print_rho_v_pi: struct for hydrodynamic fields: this is the interface +/** struct for hydrodynamic fields: this is the interface * and stores values in MD units. It should not be used * as an input for any LB calculations. TODO: in the future, * one might want to have several structures for printing @@ -97,11 +98,8 @@ LB_node_force_density_gpu node_f = { static float *lb_boundary_force = nullptr; #endif -/** @name pointers for additional cuda check flag */ -/**@{*/ -static int *gpu_check = nullptr; -static int *h_gpu_check = nullptr; -/**@}*/ +/** @brief Whether LB GPU was initialized */ +static bool *device_gpu_lb_initialized = nullptr; /** @brief Direction of data transfer between @ref nodes_a and @ref nodes_b * during integration in @ref lb_integrate_GPU @@ -117,11 +115,8 @@ static size_t size_of_rho_v_pi; /** Parameters residing in constant memory */ __device__ __constant__ LB_parameters_gpu para[1]; -/*********************************************************/ -/** \name device functions called by kernel functions */ -/*********************************************************/ - static constexpr float sqrt12 = 3.4641016151377544f; +static constexpr unsigned int threads_per_block = 64; OptionalCounter rng_counter_coupling_gpu; OptionalCounter rng_counter_fluid_gpu; @@ -141,139 +136,152 @@ template __device__ uint3 index_to_xyz(T index) { * @param[in] x,y,z The xyz array */ template __device__ T xyz_to_index(T x, T y, T z) { - return x + para->dim_x * (y + para->dim_y * z); + return x + + static_cast(para->dim_x) * (y + static_cast(para->dim_y) * z); } __device__ __inline__ float calc_mode_x_from_n(LB_nodes_gpu n_a, unsigned int index, int x) { - auto const flat_index = [&index](int population) { + auto const flat_index = [&index](unsigned population) { return population * para->number_of_nodes + index; }; switch (x) { case 0: - return n_a.vd[flat_index(0)] + n_a.vd[flat_index(1)] + - n_a.vd[flat_index(2)] + n_a.vd[flat_index(3)] + - n_a.vd[flat_index(4)] + n_a.vd[flat_index(5)] + - n_a.vd[flat_index(6)] + n_a.vd[flat_index(7)] + - n_a.vd[flat_index(8)] + n_a.vd[flat_index(9)] + - n_a.vd[flat_index(10)] + n_a.vd[flat_index(11)] + - n_a.vd[flat_index(12)] + n_a.vd[flat_index(13)] + - n_a.vd[flat_index(14)] + n_a.vd[flat_index(15)] + - n_a.vd[flat_index(16)] + n_a.vd[flat_index(17)] + - n_a.vd[flat_index(18)]; + return n_a.populations[flat_index(0)] + n_a.populations[flat_index(1)] + + n_a.populations[flat_index(2)] + n_a.populations[flat_index(3)] + + n_a.populations[flat_index(4)] + n_a.populations[flat_index(5)] + + n_a.populations[flat_index(6)] + n_a.populations[flat_index(7)] + + n_a.populations[flat_index(8)] + n_a.populations[flat_index(9)] + + n_a.populations[flat_index(10)] + n_a.populations[flat_index(11)] + + n_a.populations[flat_index(12)] + n_a.populations[flat_index(13)] + + n_a.populations[flat_index(14)] + n_a.populations[flat_index(15)] + + n_a.populations[flat_index(16)] + n_a.populations[flat_index(17)] + + n_a.populations[flat_index(18)]; case 1: - return (n_a.vd[flat_index(1)] - n_a.vd[flat_index(2)]) + - (n_a.vd[flat_index(7)] - n_a.vd[flat_index(8)]) + - (n_a.vd[flat_index(9)] - n_a.vd[flat_index(10)]) + - (n_a.vd[flat_index(11)] - n_a.vd[flat_index(12)]) + - (n_a.vd[flat_index(13)] - n_a.vd[flat_index(14)]); + return (n_a.populations[flat_index(1)] - n_a.populations[flat_index(2)]) + + (n_a.populations[flat_index(7)] - n_a.populations[flat_index(8)]) + + (n_a.populations[flat_index(9)] - n_a.populations[flat_index(10)]) + + (n_a.populations[flat_index(11)] - n_a.populations[flat_index(12)]) + + (n_a.populations[flat_index(13)] - n_a.populations[flat_index(14)]); case 2: - return (n_a.vd[flat_index(3)] - n_a.vd[flat_index(4)]) + - (n_a.vd[flat_index(7)] - n_a.vd[flat_index(8)]) - - (n_a.vd[flat_index(9)] - n_a.vd[flat_index(10)]) + - (n_a.vd[flat_index(15)] - n_a.vd[flat_index(16)]) + - (n_a.vd[flat_index(17)] - n_a.vd[flat_index(18)]); + return (n_a.populations[flat_index(3)] - n_a.populations[flat_index(4)]) + + (n_a.populations[flat_index(7)] - n_a.populations[flat_index(8)]) - + (n_a.populations[flat_index(9)] - n_a.populations[flat_index(10)]) + + (n_a.populations[flat_index(15)] - n_a.populations[flat_index(16)]) + + (n_a.populations[flat_index(17)] - n_a.populations[flat_index(18)]); case 3: - return (n_a.vd[flat_index(5)] - n_a.vd[flat_index(6)]) + - (n_a.vd[flat_index(11)] - n_a.vd[flat_index(12)]) - - (n_a.vd[flat_index(13)] - n_a.vd[flat_index(14)]) + - (n_a.vd[flat_index(15)] - n_a.vd[flat_index(16)]) - - (n_a.vd[flat_index(17)] - n_a.vd[flat_index(18)]); + return (n_a.populations[flat_index(5)] - n_a.populations[flat_index(6)]) + + (n_a.populations[flat_index(11)] - n_a.populations[flat_index(12)]) - + (n_a.populations[flat_index(13)] - n_a.populations[flat_index(14)]) + + (n_a.populations[flat_index(15)] - n_a.populations[flat_index(16)]) - + (n_a.populations[flat_index(17)] - n_a.populations[flat_index(18)]); case 4: - return -n_a.vd[flat_index(0)] + n_a.vd[flat_index(7)] + - n_a.vd[flat_index(8)] + n_a.vd[flat_index(9)] + - n_a.vd[flat_index(10)] + n_a.vd[flat_index(11)] + - n_a.vd[flat_index(12)] + n_a.vd[flat_index(13)] + - n_a.vd[flat_index(14)] + n_a.vd[flat_index(15)] + - n_a.vd[flat_index(16)] + n_a.vd[flat_index(17)] + - n_a.vd[flat_index(18)]; + return -n_a.populations[flat_index(0)] + n_a.populations[flat_index(7)] + + n_a.populations[flat_index(8)] + n_a.populations[flat_index(9)] + + n_a.populations[flat_index(10)] + n_a.populations[flat_index(11)] + + n_a.populations[flat_index(12)] + n_a.populations[flat_index(13)] + + n_a.populations[flat_index(14)] + n_a.populations[flat_index(15)] + + n_a.populations[flat_index(16)] + n_a.populations[flat_index(17)] + + n_a.populations[flat_index(18)]; case 5: - return (n_a.vd[flat_index(1)] + n_a.vd[flat_index(2)]) - - (n_a.vd[flat_index(3)] + n_a.vd[flat_index(4)]) + - (n_a.vd[flat_index(11)] + n_a.vd[flat_index(12)]) + - (n_a.vd[flat_index(13)] + n_a.vd[flat_index(14)]) - - (n_a.vd[flat_index(15)] + n_a.vd[flat_index(16)]) - - (n_a.vd[flat_index(17)] + n_a.vd[flat_index(18)]); + return (n_a.populations[flat_index(1)] + n_a.populations[flat_index(2)]) - + (n_a.populations[flat_index(3)] + n_a.populations[flat_index(4)]) + + (n_a.populations[flat_index(11)] + n_a.populations[flat_index(12)]) + + (n_a.populations[flat_index(13)] + n_a.populations[flat_index(14)]) - + (n_a.populations[flat_index(15)] + n_a.populations[flat_index(16)]) - + (n_a.populations[flat_index(17)] + n_a.populations[flat_index(18)]); case 6: - return (n_a.vd[flat_index(1)] + n_a.vd[flat_index(2)]) + - (n_a.vd[flat_index(3)] + n_a.vd[flat_index(4)]) - - (n_a.vd[flat_index(11)] + n_a.vd[flat_index(12)]) - - (n_a.vd[flat_index(13)] + n_a.vd[flat_index(14)]) - - (n_a.vd[flat_index(15)] + n_a.vd[flat_index(16)]) - - (n_a.vd[flat_index(17)] + n_a.vd[flat_index(18)]) - - 2.0f * ((n_a.vd[flat_index(5)] + n_a.vd[flat_index(6)]) - - (n_a.vd[flat_index(7)] + n_a.vd[flat_index(8)]) - - (n_a.vd[flat_index(9)] + n_a.vd[flat_index(10)])); + return (n_a.populations[flat_index(1)] + n_a.populations[flat_index(2)]) + + (n_a.populations[flat_index(3)] + n_a.populations[flat_index(4)]) - + (n_a.populations[flat_index(11)] + n_a.populations[flat_index(12)]) - + (n_a.populations[flat_index(13)] + n_a.populations[flat_index(14)]) - + (n_a.populations[flat_index(15)] + n_a.populations[flat_index(16)]) - + (n_a.populations[flat_index(17)] + n_a.populations[flat_index(18)]) - + 2.0f * ((n_a.populations[flat_index(5)] + + n_a.populations[flat_index(6)]) - + (n_a.populations[flat_index(7)] + + n_a.populations[flat_index(8)]) - + (n_a.populations[flat_index(9)] + + n_a.populations[flat_index(10)])); case 7: - return (n_a.vd[flat_index(7)] + n_a.vd[flat_index(8)]) - - (n_a.vd[flat_index(9)] + n_a.vd[flat_index(10)]); + return (n_a.populations[flat_index(7)] + n_a.populations[flat_index(8)]) - + (n_a.populations[flat_index(9)] + n_a.populations[flat_index(10)]); case 8: - return (n_a.vd[flat_index(11)] + n_a.vd[flat_index(12)]) - - (n_a.vd[flat_index(13)] + n_a.vd[flat_index(14)]); + return (n_a.populations[flat_index(11)] + n_a.populations[flat_index(12)]) - + (n_a.populations[flat_index(13)] + n_a.populations[flat_index(14)]); case 9: - return (n_a.vd[flat_index(15)] + n_a.vd[flat_index(16)]) - - (n_a.vd[flat_index(17)] + n_a.vd[flat_index(18)]); + return (n_a.populations[flat_index(15)] + n_a.populations[flat_index(16)]) - + (n_a.populations[flat_index(17)] + n_a.populations[flat_index(18)]); case 10: - return -2.0f * (n_a.vd[flat_index(1)] - n_a.vd[flat_index(2)]) + - (n_a.vd[flat_index(7)] - n_a.vd[flat_index(8)]) + - (n_a.vd[flat_index(9)] - n_a.vd[flat_index(10)]) + - (n_a.vd[flat_index(11)] - n_a.vd[flat_index(12)]) + - (n_a.vd[flat_index(13)] - n_a.vd[flat_index(14)]); + return -2.0f * (n_a.populations[flat_index(1)] - + n_a.populations[flat_index(2)]) + + (n_a.populations[flat_index(7)] - n_a.populations[flat_index(8)]) + + (n_a.populations[flat_index(9)] - n_a.populations[flat_index(10)]) + + (n_a.populations[flat_index(11)] - n_a.populations[flat_index(12)]) + + (n_a.populations[flat_index(13)] - n_a.populations[flat_index(14)]); case 11: - return -2.0f * (n_a.vd[flat_index(3)] - n_a.vd[flat_index(4)]) + - (n_a.vd[flat_index(7)] - n_a.vd[flat_index(8)]) - - (n_a.vd[flat_index(9)] - n_a.vd[flat_index(10)]) + - (n_a.vd[flat_index(15)] - n_a.vd[flat_index(16)]) + - (n_a.vd[flat_index(17)] - n_a.vd[flat_index(18)]); + return -2.0f * (n_a.populations[flat_index(3)] - + n_a.populations[flat_index(4)]) + + (n_a.populations[flat_index(7)] - n_a.populations[flat_index(8)]) - + (n_a.populations[flat_index(9)] - n_a.populations[flat_index(10)]) + + (n_a.populations[flat_index(15)] - n_a.populations[flat_index(16)]) + + (n_a.populations[flat_index(17)] - n_a.populations[flat_index(18)]); case 12: - return -2.0f * (n_a.vd[flat_index(5)] - n_a.vd[flat_index(6)]) + - (n_a.vd[flat_index(11)] - n_a.vd[flat_index(12)]) - - (n_a.vd[flat_index(13)] - n_a.vd[flat_index(14)]) + - (n_a.vd[flat_index(15)] - n_a.vd[flat_index(16)]) - - (n_a.vd[flat_index(17)] - n_a.vd[flat_index(18)]); + return -2.0f * (n_a.populations[flat_index(5)] - + n_a.populations[flat_index(6)]) + + (n_a.populations[flat_index(11)] - n_a.populations[flat_index(12)]) - + (n_a.populations[flat_index(13)] - n_a.populations[flat_index(14)]) + + (n_a.populations[flat_index(15)] - n_a.populations[flat_index(16)]) - + (n_a.populations[flat_index(17)] - n_a.populations[flat_index(18)]); case 13: - return (n_a.vd[flat_index(7)] - n_a.vd[flat_index(8)]) + - (n_a.vd[flat_index(9)] - n_a.vd[flat_index(10)]) - - (n_a.vd[flat_index(11)] - n_a.vd[flat_index(12)]) - - (n_a.vd[flat_index(13)] - n_a.vd[flat_index(14)]); + return (n_a.populations[flat_index(7)] - n_a.populations[flat_index(8)]) + + (n_a.populations[flat_index(9)] - n_a.populations[flat_index(10)]) - + (n_a.populations[flat_index(11)] - n_a.populations[flat_index(12)]) - + (n_a.populations[flat_index(13)] - n_a.populations[flat_index(14)]); case 14: - return (n_a.vd[flat_index(7)] - n_a.vd[flat_index(8)]) - - (n_a.vd[flat_index(9)] - n_a.vd[flat_index(10)]) - - (n_a.vd[flat_index(15)] - n_a.vd[flat_index(16)]) - - (n_a.vd[flat_index(17)] - n_a.vd[flat_index(18)]); + return (n_a.populations[flat_index(7)] - n_a.populations[flat_index(8)]) - + (n_a.populations[flat_index(9)] - n_a.populations[flat_index(10)]) - + (n_a.populations[flat_index(15)] - n_a.populations[flat_index(16)]) - + (n_a.populations[flat_index(17)] - n_a.populations[flat_index(18)]); case 15: - return (n_a.vd[flat_index(11)] - n_a.vd[flat_index(12)]) - - (n_a.vd[flat_index(13)] - n_a.vd[flat_index(14)]) - - (n_a.vd[flat_index(15)] - n_a.vd[flat_index(16)]) + - (n_a.vd[flat_index(17)] - n_a.vd[flat_index(18)]); + return (n_a.populations[flat_index(11)] - n_a.populations[flat_index(12)]) - + (n_a.populations[flat_index(13)] - n_a.populations[flat_index(14)]) - + (n_a.populations[flat_index(15)] - n_a.populations[flat_index(16)]) + + (n_a.populations[flat_index(17)] - n_a.populations[flat_index(18)]); case 16: - return n_a.vd[flat_index(0)] + n_a.vd[flat_index(7)] + - n_a.vd[flat_index(8)] + n_a.vd[flat_index(9)] + - n_a.vd[flat_index(10)] + n_a.vd[flat_index(11)] + - n_a.vd[flat_index(12)] + n_a.vd[flat_index(13)] + - n_a.vd[flat_index(14)] + n_a.vd[flat_index(15)] + - n_a.vd[flat_index(16)] + n_a.vd[flat_index(17)] + - n_a.vd[flat_index(18)] - - 2.0f * ((n_a.vd[flat_index(1)] + n_a.vd[flat_index(2)]) + - (n_a.vd[flat_index(3)] + n_a.vd[flat_index(4)]) + - (n_a.vd[flat_index(5)] + n_a.vd[flat_index(6)])); + return n_a.populations[flat_index(0)] + n_a.populations[flat_index(7)] + + n_a.populations[flat_index(8)] + n_a.populations[flat_index(9)] + + n_a.populations[flat_index(10)] + n_a.populations[flat_index(11)] + + n_a.populations[flat_index(12)] + n_a.populations[flat_index(13)] + + n_a.populations[flat_index(14)] + n_a.populations[flat_index(15)] + + n_a.populations[flat_index(16)] + n_a.populations[flat_index(17)] + + n_a.populations[flat_index(18)] - + 2.0f * ((n_a.populations[flat_index(1)] + + n_a.populations[flat_index(2)]) + + (n_a.populations[flat_index(3)] + + n_a.populations[flat_index(4)]) + + (n_a.populations[flat_index(5)] + + n_a.populations[flat_index(6)])); case 17: - return -(n_a.vd[flat_index(1)] + n_a.vd[flat_index(2)]) + - (n_a.vd[flat_index(3)] + n_a.vd[flat_index(4)]) + - (n_a.vd[flat_index(11)] + n_a.vd[flat_index(12)]) + - (n_a.vd[flat_index(13)] + n_a.vd[flat_index(14)]) - - (n_a.vd[flat_index(15)] + n_a.vd[flat_index(16)]) - - (n_a.vd[flat_index(17)] + n_a.vd[flat_index(18)]); + return -(n_a.populations[flat_index(1)] + n_a.populations[flat_index(2)]) + + (n_a.populations[flat_index(3)] + n_a.populations[flat_index(4)]) + + (n_a.populations[flat_index(11)] + n_a.populations[flat_index(12)]) + + (n_a.populations[flat_index(13)] + n_a.populations[flat_index(14)]) - + (n_a.populations[flat_index(15)] + n_a.populations[flat_index(16)]) - + (n_a.populations[flat_index(17)] + n_a.populations[flat_index(18)]); case 18: - return -(n_a.vd[flat_index(1)] + n_a.vd[flat_index(2)]) - - (n_a.vd[flat_index(3)] + n_a.vd[flat_index(4)]) - - (n_a.vd[flat_index(11)] + n_a.vd[flat_index(12)]) - - (n_a.vd[flat_index(13)] + n_a.vd[flat_index(14)]) - - (n_a.vd[flat_index(15)] + n_a.vd[flat_index(16)]) - - (n_a.vd[flat_index(17)] + n_a.vd[flat_index(18)]) + - 2.0f * ((n_a.vd[flat_index(5)] + n_a.vd[flat_index(6)]) + - (n_a.vd[flat_index(7)] + n_a.vd[flat_index(8)]) + - (n_a.vd[flat_index(9)] + n_a.vd[flat_index(10)])); + return -(n_a.populations[flat_index(1)] + n_a.populations[flat_index(2)]) - + (n_a.populations[flat_index(3)] + n_a.populations[flat_index(4)]) - + (n_a.populations[flat_index(11)] + n_a.populations[flat_index(12)]) - + (n_a.populations[flat_index(13)] + n_a.populations[flat_index(14)]) - + (n_a.populations[flat_index(15)] + n_a.populations[flat_index(16)]) - + (n_a.populations[flat_index(17)] + n_a.populations[flat_index(18)]) + + 2.0f * ((n_a.populations[flat_index(5)] + + n_a.populations[flat_index(6)]) + + (n_a.populations[flat_index(7)] + + n_a.populations[flat_index(8)]) + + (n_a.populations[flat_index(9)] + + n_a.populations[flat_index(10)])); } return 0.0; } @@ -425,26 +433,22 @@ reset_LB_force_densities_kernel(LB_node_force_density_gpu node_f, } void reset_LB_force_densities_GPU(bool buffer) { - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = static_cast( - (lbpar_gpu.number_of_nodes + threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); KERNELCALL(reset_LB_force_densities_kernel, dim_grid, threads_per_block, node_f, buffer); } /** - * @param[in] mode Local register values mode + * @param[in] modes Local register values modes * @param[in] index Node index / thread index * @param[in] node_f Local node force * @param[out] d_v Local device values */ -__device__ void update_rho_v(Utils::Array const &mode, +__device__ void update_rho_v(Utils::Array const &modes, unsigned int index, - LB_node_force_density_gpu node_f, + LB_node_force_density_gpu const &node_f, LB_rho_v_gpu *d_v) { float Rho_tot = 0.0f; float u_tot[3] = {0.0f, 0.0f, 0.0f}; @@ -453,11 +457,11 @@ __device__ void update_rho_v(Utils::Array const &mode, * remember that the populations are stored as differences to their * equilibrium value */ - d_v[index].rho = mode[0] + para->rho; - Rho_tot += mode[0] + para->rho; - u_tot[0] += mode[1]; - u_tot[1] += mode[2]; - u_tot[2] += mode[3]; + d_v[index].rho = modes[0] + para->rho; + Rho_tot += modes[0] + para->rho; + u_tot[0] += modes[1]; + u_tot[1] += modes[2]; + u_tot[2] += modes[3]; /** If forces are present, the momentum density is redefined to * include one half-step of the force action. See the @@ -503,7 +507,7 @@ __device__ void relax_modes(Utils::Array &mode, unsigned int index, j[1] = Rho * u_tot[1]; j[2] = Rho * u_tot[2]; - /** equilibrium part of the stress modes (eq13 schiller) */ + /* equilibrium part of the stress modes (eq13 schiller) */ modes_from_pi_eq[0] = ((j[0] * j[0]) + (j[1] * j[1]) + (j[2] * j[2])) / Rho; modes_from_pi_eq[1] = ((j[0] * j[0]) - (j[1] * j[1])) / Rho; @@ -514,7 +518,7 @@ __device__ void relax_modes(Utils::Array &mode, unsigned int index, modes_from_pi_eq[4] = j[0] * j[2] / Rho; modes_from_pi_eq[5] = j[1] * j[2] / Rho; - /** relax the stress modes (eq14 schiller) */ + /* relax the stress modes (eq14 schiller) */ mode[4] = modes_from_pi_eq[0] + para->gamma_bulk * (mode[4] - modes_from_pi_eq[0]); @@ -546,6 +550,7 @@ __device__ void relax_modes(Utils::Array &mode, unsigned int index, /** Thermalization of the modes with Gaussian random numbers * @param[in] index Node index / thread index * @param[in,out] mode Local register values mode + * @param[in] philox_counter Philox counter */ __device__ void thermalize_modes(Utils::Array &mode, unsigned int index, uint64_t philox_counter) { @@ -655,130 +660,141 @@ __device__ void calc_n_from_modes_push(LB_nodes_gpu n_b, unsigned int y = xyz.y; unsigned int z = xyz.z; - n_b.vd[0 * para->number_of_nodes + x + para->dim_x * y + - para->dim_x * para->dim_y * z] = + n_b.populations[0 * para->number_of_nodes + x + para->dim_x * y + + para->dim_x * para->dim_y * z] = 1.0f / 3.0f * (mode[0] - mode[4] + mode[16]); - n_b.vd[1 * para->number_of_nodes + (x + 1) % para->dim_x + para->dim_x * y + - para->dim_x * para->dim_y * z] = + n_b.populations[1 * para->number_of_nodes + (x + 1) % para->dim_x + + para->dim_x * y + para->dim_x * para->dim_y * z] = 1.0f / 18.0f * (mode[0] + mode[1] + mode[5] + mode[6] - mode[17] - mode[18] - 2.0f * (mode[10] + mode[16])); - n_b.vd[2 * para->number_of_nodes + (para->dim_x + x - 1) % para->dim_x + - para->dim_x * y + para->dim_x * para->dim_y * z] = + n_b.populations[2 * para->number_of_nodes + + (para->dim_x + x - 1) % para->dim_x + para->dim_x * y + + para->dim_x * para->dim_y * z] = 1.0f / 18.0f * (mode[0] - mode[1] + mode[5] + mode[6] - mode[17] - mode[18] + 2.0f * (mode[10] - mode[16])); - n_b.vd[3 * para->number_of_nodes + x + para->dim_x * ((y + 1) % para->dim_y) + - para->dim_x * para->dim_y * z] = + n_b.populations[3 * para->number_of_nodes + x + + para->dim_x * ((y + 1) % para->dim_y) + + para->dim_x * para->dim_y * z] = 1.0f / 18.0f * (mode[0] + mode[2] - mode[5] + mode[6] + mode[17] - mode[18] - 2.0f * (mode[11] + mode[16])); - n_b.vd[4 * para->number_of_nodes + x + - para->dim_x * ((para->dim_y + y - 1) % para->dim_y) + - para->dim_x * para->dim_y * z] = + n_b.populations[4 * para->number_of_nodes + x + + para->dim_x * ((para->dim_y + y - 1) % para->dim_y) + + para->dim_x * para->dim_y * z] = 1.0f / 18.0f * (mode[0] - mode[2] - mode[5] + mode[6] + mode[17] - mode[18] + 2.0f * (mode[11] - mode[16])); - n_b.vd[5 * para->number_of_nodes + x + para->dim_x * y + - para->dim_x * para->dim_y * ((z + 1) % para->dim_z)] = + n_b.populations[5 * para->number_of_nodes + x + para->dim_x * y + + para->dim_x * para->dim_y * ((z + 1) % para->dim_z)] = 1.0f / 18.0f * (mode[0] + mode[3] - 2.0f * (mode[6] + mode[12] + mode[16] - mode[18])); - n_b.vd[6 * para->number_of_nodes + x + para->dim_x * y + - para->dim_x * para->dim_y * ((para->dim_z + z - 1) % para->dim_z)] = + n_b.populations[6 * para->number_of_nodes + x + para->dim_x * y + + para->dim_x * para->dim_y * + ((para->dim_z + z - 1) % para->dim_z)] = 1.0f / 18.0f * (mode[0] - mode[3] - 2.0f * (mode[6] - mode[12] + mode[16] - mode[18])); - n_b.vd[7 * para->number_of_nodes + (x + 1) % para->dim_x + - para->dim_x * ((y + 1) % para->dim_y) + - para->dim_x * para->dim_y * z] = + n_b.populations[7 * para->number_of_nodes + (x + 1) % para->dim_x + + para->dim_x * ((y + 1) % para->dim_y) + + para->dim_x * para->dim_y * z] = 1.0f / 36.0f * (mode[0] + mode[1] + mode[2] + mode[4] + 2.0f * mode[6] + mode[7] + mode[10] + mode[11] + mode[13] + mode[14] + mode[16] + 2.0f * mode[18]); - n_b.vd[8 * para->number_of_nodes + (para->dim_x + x - 1) % para->dim_x + - para->dim_x * ((para->dim_y + y - 1) % para->dim_y) + - para->dim_x * para->dim_y * z] = + n_b.populations[8 * para->number_of_nodes + + (para->dim_x + x - 1) % para->dim_x + + para->dim_x * ((para->dim_y + y - 1) % para->dim_y) + + para->dim_x * para->dim_y * z] = 1.0f / 36.0f * (mode[0] - mode[1] - mode[2] + mode[4] + 2.0f * mode[6] + mode[7] - mode[10] - mode[11] - mode[13] - mode[14] + mode[16] + 2.0f * mode[18]); - n_b.vd[9 * para->number_of_nodes + (x + 1) % para->dim_x + - para->dim_x * ((para->dim_y + y - 1) % para->dim_y) + - para->dim_x * para->dim_y * z] = + n_b.populations[9 * para->number_of_nodes + (x + 1) % para->dim_x + + para->dim_x * ((para->dim_y + y - 1) % para->dim_y) + + para->dim_x * para->dim_y * z] = 1.0f / 36.0f * (mode[0] + mode[1] - mode[2] + mode[4] + 2.0f * mode[6] - mode[7] + mode[10] - mode[11] + mode[13] - mode[14] + mode[16] + 2.0f * mode[18]); - n_b.vd[10 * para->number_of_nodes + (para->dim_x + x - 1) % para->dim_x + - para->dim_x * ((y + 1) % para->dim_y) + - para->dim_x * para->dim_y * z] = + n_b.populations[10 * para->number_of_nodes + + (para->dim_x + x - 1) % para->dim_x + + para->dim_x * ((y + 1) % para->dim_y) + + para->dim_x * para->dim_y * z] = 1.0f / 36.0f * (mode[0] - mode[1] + mode[2] + mode[4] + 2.0f * mode[6] - mode[7] - mode[10] + mode[11] - mode[13] + mode[14] + mode[16] + 2.0f * mode[18]); - n_b.vd[11 * para->number_of_nodes + (x + 1) % para->dim_x + para->dim_x * y + - para->dim_x * para->dim_y * ((z + 1) % para->dim_z)] = + n_b.populations[11 * para->number_of_nodes + (x + 1) % para->dim_x + + para->dim_x * y + + para->dim_x * para->dim_y * ((z + 1) % para->dim_z)] = 1.0f / 36.0f * (mode[0] + mode[1] + mode[3] + mode[4] + mode[5] - mode[6] + mode[8] + mode[10] + mode[12] - mode[13] + mode[15] + mode[16] + mode[17] - mode[18]); - n_b.vd[12 * para->number_of_nodes + (para->dim_x + x - 1) % para->dim_x + - para->dim_x * y + - para->dim_x * para->dim_y * ((para->dim_z + z - 1) % para->dim_z)] = + n_b.populations[12 * para->number_of_nodes + + (para->dim_x + x - 1) % para->dim_x + para->dim_x * y + + para->dim_x * para->dim_y * + ((para->dim_z + z - 1) % para->dim_z)] = 1.0f / 36.0f * (mode[0] - mode[1] - mode[3] + mode[4] + mode[5] - mode[6] + mode[8] - mode[10] - mode[12] + mode[13] - mode[15] + mode[16] + mode[17] - mode[18]); - n_b.vd[13 * para->number_of_nodes + (x + 1) % para->dim_x + para->dim_x * y + - para->dim_x * para->dim_y * ((para->dim_z + z - 1) % para->dim_z)] = + n_b.populations[13 * para->number_of_nodes + (x + 1) % para->dim_x + + para->dim_x * y + + para->dim_x * para->dim_y * + ((para->dim_z + z - 1) % para->dim_z)] = 1.0f / 36.0f * (mode[0] + mode[1] - mode[3] + mode[4] + mode[5] - mode[6] - mode[8] + mode[10] - mode[12] - mode[13] - mode[15] + mode[16] + mode[17] - mode[18]); - n_b.vd[14 * para->number_of_nodes + (para->dim_x + x - 1) % para->dim_x + - para->dim_x * y + - para->dim_x * para->dim_y * ((z + 1) % para->dim_z)] = + n_b.populations[14 * para->number_of_nodes + + (para->dim_x + x - 1) % para->dim_x + para->dim_x * y + + para->dim_x * para->dim_y * ((z + 1) % para->dim_z)] = 1.0f / 36.0f * (mode[0] - mode[1] + mode[3] + mode[4] + mode[5] - mode[6] - mode[8] - mode[10] + mode[12] + mode[13] + mode[15] + mode[16] + mode[17] - mode[18]); - n_b.vd[15 * para->number_of_nodes + x + - para->dim_x * ((y + 1) % para->dim_y) + - para->dim_x * para->dim_y * ((z + 1) % para->dim_z)] = + n_b.populations[15 * para->number_of_nodes + x + + para->dim_x * ((y + 1) % para->dim_y) + + para->dim_x * para->dim_y * ((z + 1) % para->dim_z)] = 1.0f / 36.0f * (mode[0] + mode[2] + mode[3] + mode[4] - mode[5] - mode[6] + mode[9] + mode[11] + mode[12] - mode[14] - mode[15] + mode[16] - mode[17] - mode[18]); - n_b.vd[16 * para->number_of_nodes + x + - para->dim_x * ((para->dim_y + y - 1) % para->dim_y) + - para->dim_x * para->dim_y * ((para->dim_z + z - 1) % para->dim_z)] = + n_b.populations[16 * para->number_of_nodes + x + + para->dim_x * ((para->dim_y + y - 1) % para->dim_y) + + para->dim_x * para->dim_y * + ((para->dim_z + z - 1) % para->dim_z)] = 1.0f / 36.0f * (mode[0] - mode[2] - mode[3] + mode[4] - mode[5] - mode[6] + mode[9] - mode[11] - mode[12] + mode[14] + mode[15] + mode[16] - mode[17] - mode[18]); - n_b.vd[17 * para->number_of_nodes + x + - para->dim_x * ((y + 1) % para->dim_y) + - para->dim_x * para->dim_y * ((para->dim_z + z - 1) % para->dim_z)] = + n_b.populations[17 * para->number_of_nodes + x + + para->dim_x * ((y + 1) % para->dim_y) + + para->dim_x * para->dim_y * + ((para->dim_z + z - 1) % para->dim_z)] = 1.0f / 36.0f * (mode[0] + mode[2] - mode[3] + mode[4] - mode[5] - mode[6] - mode[9] + mode[11] - mode[12] - mode[14] + mode[15] + mode[16] - mode[17] - mode[18]); - n_b.vd[18 * para->number_of_nodes + x + - para->dim_x * ((para->dim_y + y - 1) % para->dim_y) + - para->dim_x * para->dim_y * ((z + 1) % para->dim_z)] = + n_b.populations[18 * para->number_of_nodes + x + + para->dim_x * ((para->dim_y + y - 1) % para->dim_y) + + para->dim_x * para->dim_y * ((z + 1) % para->dim_z)] = 1.0f / 36.0f * (mode[0] - mode[2] + mode[3] + mode[4] - mode[5] - mode[6] - mode[9] - mode[11] + mode[12] + mode[14] - mode[15] + mode[16] - mode[17] - @@ -804,7 +820,7 @@ __device__ void bounce_back_boundaries(LB_nodes_gpu n_curr, float shift, weight, pop_to_bounce_back; float boundary_force[3] = {0.0f, 0.0f, 0.0f}; size_t to_index, to_index_x, to_index_y, to_index_z; - int population, inverse; + unsigned population, inverse; if (boundaries.index[index] != 0) { auto const v = boundaries.velocity[index]; @@ -815,7 +831,7 @@ __device__ void bounce_back_boundaries(LB_nodes_gpu n_curr, unsigned int y = xyz.y; unsigned int z = xyz.z; - /* store vd temporary in second lattice to avoid race conditions */ + /* store populations temporary in second lattice to avoid race conditions */ // TODO : PUT IN EQUILIBRIUM CONTRIBUTION TO THE BOUNCE-BACK DENSITY FOR THE // BOUNDARY FORCE @@ -827,10 +843,11 @@ __device__ void bounce_back_boundaries(LB_nodes_gpu n_curr, shift = 2.0f / para->agrid * para->rho * 3.0f * weight * para->tau * \ (v[0] * static_cast(c[0]) + v[1] * static_cast(c[1]) + \ v[2] * static_cast(c[2])); \ - pop_to_bounce_back = n_curr.vd[population * para->number_of_nodes + index]; \ - to_index_x = (x + c[0] + para->dim_x) % para->dim_x; \ - to_index_y = (y + c[1] + para->dim_y) % para->dim_y; \ - to_index_z = (z + c[2] + para->dim_z) % para->dim_z; \ + pop_to_bounce_back = \ + n_curr.populations[population * para->number_of_nodes + index]; \ + to_index_x = (x + static_cast(c[0]) + para->dim_x) % para->dim_x; \ + to_index_y = (y + static_cast(c[1]) + para->dim_y) % para->dim_y; \ + to_index_z = (z + static_cast(c[2]) + para->dim_z) % para->dim_z; \ to_index = to_index_x + para->dim_x * to_index_y + \ para->dim_x * para->dim_y * to_index_z; \ if (n_curr.boundary[to_index] == 0) { \ @@ -840,7 +857,7 @@ __device__ void bounce_back_boundaries(LB_nodes_gpu n_curr, (2.0f * pop_to_bounce_back + shift) * static_cast(c[1]); \ boundary_force[2] += \ (2.0f * pop_to_bounce_back + shift) * static_cast(c[2]); \ - n_curr.vd[inverse * para->number_of_nodes + to_index] = \ + n_curr.populations[inverse * para->number_of_nodes + to_index] = \ pop_to_bounce_back + shift; \ } @@ -1063,9 +1080,82 @@ __device__ void apply_forces(unsigned int index, Utils::Array &mode, reset_LB_force_densities(index, node_f); } +__device__ Utils::Array +stress_modes(LB_rho_v_gpu const &rho_v, const Utils::Array &modes) { + /* note that d_v[index].v[] already includes the 1/2 f term, accounting + * for the pre- and post-collisional average + */ + auto const density = rho_v.rho; + Utils::Array j{density * rho_v.v[0], density * rho_v.v[1], + density * rho_v.v[2]}; + // equilibrium part of the stress modes, which comes from + // the equality between modes and stress tensor components + + /* m4 = trace(pi) - rho + m5 = pi_xx - pi_yy + m6 = trace(pi) - 3 pi_zz + m7 = pi_xy + m8 = pi_xz + m9 = pi_yz */ + + // and plugging in the Euler stress for the equilibrium: + // pi_eq = rho_0*c_s^2*I3 + (j \otimes j)/rho + // with I3 the 3D identity matrix and + // rho = \trace(rho_0*c_s^2*I3), which yields + + /* m4_from_pi_eq = j.j + m5_from_pi_eq = j_x*j_x - j_y*j_y + m6_from_pi_eq = j.j - 3*j_z*j_z + m7_from_pi_eq = j_x*j_y + m8_from_pi_eq = j_x*j_z + m9_from_pi_eq = j_y*j_z */ + + // where the / density term has been dropped. We thus obtain: + /* Now we must predict the outcome of the next collision */ + /* We immediately average pre- and post-collision. */ + /* TODO: need a reference for this. */ + Utils::Array modes_from_pi_eq{ + (j[0] * j[0] + j[1] * j[1] + j[2] * j[2]) / density, + (j[0] * j[0] - j[1] * j[1]) / density, + (j[0] * j[0] + j[1] * j[1] + j[2] * j[2] - 3.0f * j[2] * j[2]) / density, + j[0] * j[1] / density, + j[0] * j[2] / density, + j[1] * j[2] / density}; + auto res = modes; + res[4] = modes_from_pi_eq[0] + + (0.5f + 0.5f * para->gamma_bulk) * (modes[4] - modes_from_pi_eq[0]); + res[5] = modes_from_pi_eq[1] + + (0.5f + 0.5f * para->gamma_shear) * (modes[5] - modes_from_pi_eq[1]); + res[6] = modes_from_pi_eq[2] + + (0.5f + 0.5f * para->gamma_shear) * (modes[6] - modes_from_pi_eq[2]); + res[7] = modes_from_pi_eq[3] + + (0.5f + 0.5f * para->gamma_shear) * (modes[7] - modes_from_pi_eq[3]); + res[8] = modes_from_pi_eq[4] + + (0.5f + 0.5f * para->gamma_shear) * (modes[8] - modes_from_pi_eq[4]); + res[9] = modes_from_pi_eq[5] + + (0.5f + 0.5f * para->gamma_shear) * (modes[9] - modes_from_pi_eq[5]); + return res; +} + +// Transform the stress tensor components according to the modes that +// correspond to those used by U. Schiller. In terms of populations this +// expression then corresponds exactly to those in eq. (116)-(121) in +// @cite dunweg07a, when these are written out in populations. +// But to ensure this, the expression in Schiller's modes has to be +// different! +__device__ Utils::Array +stress_from_stress_modes(Utils::Array const &modes) { + return {(2.0f * (modes[0] + modes[4]) + modes[6] + 3.0f * modes[5]) / 6.0f, + modes[7], + (2.0f * (modes[0] + modes[4]) + modes[6] - 3.0f * modes[5]) / 6.0f, + modes[8], + modes[9], + (modes[0] + modes[4] - modes[6]) / 3.0f}; +} + /** Calculate hydrodynamic fields in LB units * @param[in] n_a Local node residing in array a for boundary flag - * @param[out] mode Local register values mode + * @param[out] modes Local register values modes * @param[out] d_p_v Local print values * @param[out] d_v Local device values * @param[in] node_f Local node force @@ -1074,111 +1164,26 @@ __device__ void apply_forces(unsigned int index, Utils::Array &mode, * TODO: code duplication with \ref calc_values_from_m */ __device__ void -calc_values_in_LB_units(LB_nodes_gpu n_a, Utils::Array &mode, +calc_values_in_LB_units(LB_nodes_gpu n_a, Utils::Array const &modes, LB_rho_v_pi_gpu *d_p_v, LB_rho_v_gpu *d_v, LB_node_force_density_gpu node_f, unsigned int index, unsigned int print_index) { - Utils::Array j{}; - Utils::Array modes_from_pi_eq{}; - Utils::Array pi{}; if (n_a.boundary[index] == 0) { /* Ensure we are working with the current values of d_v */ - - update_rho_v(mode, index, node_f, d_v); + update_rho_v(modes, index, node_f, d_v); d_p_v[print_index].rho = d_v[index].rho; - d_p_v[print_index].v[0] = d_v[index].v[0]; - d_p_v[print_index].v[1] = d_v[index].v[1]; - d_p_v[print_index].v[2] = d_v[index].v[2]; - /* stress calculation */ - float Rho = d_v[index].rho; + d_p_v[print_index].v = d_v[index].v; + auto const modes_tmp = stress_modes(d_v[index], modes); - /* note that d_v[index].v[] already includes the 1/2 f term, accounting - * for the pre- and post-collisional average - */ + d_p_v[print_index].pi = stress_from_stress_modes(modes_tmp); - j[0] = Rho * d_v[index].v[0]; - j[1] = Rho * d_v[index].v[1]; - j[2] = Rho * d_v[index].v[2]; - - // equilibrium part of the stress modes, which comes from - // the equality between modes and stress tensor components - - /* m4 = trace(pi) - rho - m5 = pi_xx - pi_yy - m6 = trace(pi) - 3 pi_zz - m7 = pi_xy - m8 = pi_xz - m9 = pi_yz */ - - // and plugging in the Euler stress for the equilibrium: - // pi_eq = rho_0*c_s^2*I3 + (j \otimes j)/rho - // with I3 the 3D identity matrix and - // rho = \trace(rho_0*c_s^2*I3), which yields - - /* m4_from_pi_eq = j.j - m5_from_pi_eq = j_x*j_x - j_y*j_y - m6_from_pi_eq = j.j - 3*j_z*j_z - m7_from_pi_eq = j_x*j_y - m8_from_pi_eq = j_x*j_z - m9_from_pi_eq = j_y*j_z */ - - // where the / Rho term has been dropped. We thus obtain: - - modes_from_pi_eq[0] = (j[0] * j[0] + j[1] * j[1] + j[2] * j[2]) / Rho; - modes_from_pi_eq[1] = (j[0] * j[0] - j[1] * j[1]) / Rho; - modes_from_pi_eq[2] = - (j[0] * j[0] + j[1] * j[1] + j[2] * j[2] - 3.0f * j[2] * j[2]) / Rho; - modes_from_pi_eq[3] = j[0] * j[1] / Rho; - modes_from_pi_eq[4] = j[0] * j[2] / Rho; - modes_from_pi_eq[5] = j[1] * j[2] / Rho; - - /* Now we must predict the outcome of the next collision */ - /* We immediately average pre- and post-collision. */ - /* TODO: need a reference for this. */ - - mode[4] = modes_from_pi_eq[0] + (0.5f + 0.5f * para->gamma_bulk) * - (mode[4] - modes_from_pi_eq[0]); - mode[5] = modes_from_pi_eq[1] + (0.5f + 0.5f * para->gamma_shear) * - (mode[5] - modes_from_pi_eq[1]); - mode[6] = modes_from_pi_eq[2] + (0.5f + 0.5f * para->gamma_shear) * - (mode[6] - modes_from_pi_eq[2]); - mode[7] = modes_from_pi_eq[3] + (0.5f + 0.5f * para->gamma_shear) * - (mode[7] - modes_from_pi_eq[3]); - mode[8] = modes_from_pi_eq[4] + (0.5f + 0.5f * para->gamma_shear) * - (mode[8] - modes_from_pi_eq[4]); - mode[9] = modes_from_pi_eq[5] + (0.5f + 0.5f * para->gamma_shear) * - (mode[9] - modes_from_pi_eq[5]); - - // Transform the stress tensor components according to the modes that - // correspond to those used by U. Schiller. In terms of populations this - // expression then corresponds exactly to those in eq. (116)-(121) in - // @cite dunweg07a, when these are written out in populations. - // But to ensure this, the expression in Schiller's modes has to be - // different! - - pi[0] += - (2.0f * (mode[0] + mode[4]) + mode[6] + 3.0f * mode[5]) / 6.0f; // xx - pi[1] += mode[7]; // xy - pi[2] += - (2.0f * (mode[0] + mode[4]) + mode[6] - 3.0f * mode[5]) / 6.0f; // yy - pi[3] += mode[8]; // xz - pi[4] += mode[9]; // yz - pi[5] += (mode[0] + mode[4] - mode[6]) / 3.0f; // zz - - for (int i = 0; i < 6; i++) { - d_p_v[print_index].pi[i] = pi[i]; - } } else { d_p_v[print_index].rho = 0.0f; - - for (auto &val : d_p_v[print_index].v) - val = 0.0f; - - for (auto &val : d_p_v[print_index].pi) - val = 0.0f; + d_p_v[print_index].v = {}; + d_p_v[print_index].pi = {}; } } @@ -1189,76 +1194,20 @@ calc_values_in_LB_units(LB_nodes_gpu n_a, Utils::Array &mode, * @param[out] j_out Momentum * @param[out] pi_out Pressure tensor */ -__device__ void calc_values_from_m(Utils::Array &mode_single, - LB_rho_v_gpu *d_v_single, float *rho_out, - float *j_out, float *pi_out) { - Utils::Array modes_from_pi_eq{}; - Utils::Array j{}; - float Rho; - - // stress calculation - - // Set the rho output value - - Rho = d_v_single->rho; - *rho_out = d_v_single->rho; - - // note that d_v_single->v[] already includes the 1/2 f term, - // accounting for the pre- and post-collisional average - - j[0] = Rho * d_v_single->v[0]; - j[1] = Rho * d_v_single->v[1]; - j[2] = Rho * d_v_single->v[2]; - - j_out[3] = j[0]; - j_out[3] = j[1]; - j_out[3] = j[2]; - - // equilibrium part of the stress modes, which comes from - // the equality between modes and stress tensor components - - modes_from_pi_eq[0] = (j[0] * j[0] + j[1] * j[1] + j[2] * j[2]) / Rho; - modes_from_pi_eq[1] = (j[0] * j[0] - j[1] * j[1]) / Rho; - modes_from_pi_eq[2] = - (j[0] * j[0] + j[1] * j[1] + j[2] * j[2] - 3.0f * j[2] * j[2]) / Rho; - modes_from_pi_eq[3] = j[0] * j[1] / Rho; - modes_from_pi_eq[4] = j[0] * j[2] / Rho; - modes_from_pi_eq[5] = j[1] * j[2] / Rho; +__device__ void calc_values_from_m(Utils::Array const &mode_single, + LB_rho_v_gpu const &d_v_single, + float *rho_out, float *j_out, + Utils::Array &pi_out) { + *rho_out = d_v_single.rho; + float Rho = d_v_single.rho; + j_out[0] = Rho * d_v_single.v[0]; + j_out[1] = Rho * d_v_single.v[1]; + j_out[2] = Rho * d_v_single.v[2]; // Now we must predict the outcome of the next collision // We immediately average pre- and post-collision. - - mode_single[4] = - modes_from_pi_eq[0] + - (0.5f + 0.5f * para->gamma_bulk) * (mode_single[4] - modes_from_pi_eq[0]); - mode_single[5] = - modes_from_pi_eq[1] + (0.5f + 0.5f * para->gamma_shear) * - (mode_single[5] - modes_from_pi_eq[1]); - mode_single[6] = - modes_from_pi_eq[2] + (0.5f + 0.5f * para->gamma_shear) * - (mode_single[6] - modes_from_pi_eq[2]); - mode_single[7] = - modes_from_pi_eq[3] + (0.5f + 0.5f * para->gamma_shear) * - (mode_single[7] - modes_from_pi_eq[3]); - mode_single[8] = - modes_from_pi_eq[4] + (0.5f + 0.5f * para->gamma_shear) * - (mode_single[8] - modes_from_pi_eq[4]); - mode_single[9] = - modes_from_pi_eq[5] + (0.5f + 0.5f * para->gamma_shear) * - (mode_single[9] - modes_from_pi_eq[5]); - // Transform the stress tensor components according to the mode_singles. - - pi_out[0] = (2.0f * (mode_single[0] + mode_single[4]) + mode_single[6] + - 3.0f * mode_single[5]) / - 6.0f; // xx - pi_out[1] = mode_single[7]; // xy - pi_out[2] = (2.0f * (mode_single[0] + mode_single[4]) + mode_single[6] - - 3.0f * mode_single[5]) / - 6.0f; // yy - pi_out[3] = mode_single[8]; // xz - pi_out[4] = mode_single[9]; // yz - pi_out[5] = (mode_single[0] + mode_single[4] - mode_single[6]) / 3.0f; // zz + pi_out = stress_from_stress_modes(stress_modes(d_v_single, mode_single)); } /** @@ -1269,59 +1218,59 @@ __device__ void calc_values_from_m(Utils::Array &mode_single, __device__ void calc_mode(Utils::Array &mode, LB_nodes_gpu n_a, unsigned int node_index) { /* mass mode */ - mode[0] = n_a.vd[0 * para->number_of_nodes + node_index] + - n_a.vd[1 * para->number_of_nodes + node_index] + - n_a.vd[2 * para->number_of_nodes + node_index] + - n_a.vd[3 * para->number_of_nodes + node_index] + - n_a.vd[4 * para->number_of_nodes + node_index] + - n_a.vd[5 * para->number_of_nodes + node_index] + - n_a.vd[6 * para->number_of_nodes + node_index] + - n_a.vd[7 * para->number_of_nodes + node_index] + - n_a.vd[8 * para->number_of_nodes + node_index] + - n_a.vd[9 * para->number_of_nodes + node_index] + - n_a.vd[10 * para->number_of_nodes + node_index] + - n_a.vd[11 * para->number_of_nodes + node_index] + - n_a.vd[12 * para->number_of_nodes + node_index] + - n_a.vd[13 * para->number_of_nodes + node_index] + - n_a.vd[14 * para->number_of_nodes + node_index] + - n_a.vd[15 * para->number_of_nodes + node_index] + - n_a.vd[16 * para->number_of_nodes + node_index] + - n_a.vd[17 * para->number_of_nodes + node_index] + - n_a.vd[18 * para->number_of_nodes + node_index]; + mode[0] = n_a.populations[0 * para->number_of_nodes + node_index] + + n_a.populations[1 * para->number_of_nodes + node_index] + + n_a.populations[2 * para->number_of_nodes + node_index] + + n_a.populations[3 * para->number_of_nodes + node_index] + + n_a.populations[4 * para->number_of_nodes + node_index] + + n_a.populations[5 * para->number_of_nodes + node_index] + + n_a.populations[6 * para->number_of_nodes + node_index] + + n_a.populations[7 * para->number_of_nodes + node_index] + + n_a.populations[8 * para->number_of_nodes + node_index] + + n_a.populations[9 * para->number_of_nodes + node_index] + + n_a.populations[10 * para->number_of_nodes + node_index] + + n_a.populations[11 * para->number_of_nodes + node_index] + + n_a.populations[12 * para->number_of_nodes + node_index] + + n_a.populations[13 * para->number_of_nodes + node_index] + + n_a.populations[14 * para->number_of_nodes + node_index] + + n_a.populations[15 * para->number_of_nodes + node_index] + + n_a.populations[16 * para->number_of_nodes + node_index] + + n_a.populations[17 * para->number_of_nodes + node_index] + + n_a.populations[18 * para->number_of_nodes + node_index]; /* momentum modes */ - mode[1] = (n_a.vd[1 * para->number_of_nodes + node_index] - - n_a.vd[2 * para->number_of_nodes + node_index]) + - (n_a.vd[7 * para->number_of_nodes + node_index] - - n_a.vd[8 * para->number_of_nodes + node_index]) + - (n_a.vd[9 * para->number_of_nodes + node_index] - - n_a.vd[10 * para->number_of_nodes + node_index]) + - (n_a.vd[11 * para->number_of_nodes + node_index] - - n_a.vd[12 * para->number_of_nodes + node_index]) + - (n_a.vd[13 * para->number_of_nodes + node_index] - - n_a.vd[14 * para->number_of_nodes + node_index]); - - mode[2] = (n_a.vd[3 * para->number_of_nodes + node_index] - - n_a.vd[4 * para->number_of_nodes + node_index]) + - (n_a.vd[7 * para->number_of_nodes + node_index] - - n_a.vd[8 * para->number_of_nodes + node_index]) - - (n_a.vd[9 * para->number_of_nodes + node_index] - - n_a.vd[10 * para->number_of_nodes + node_index]) + - (n_a.vd[15 * para->number_of_nodes + node_index] - - n_a.vd[16 * para->number_of_nodes + node_index]) + - (n_a.vd[17 * para->number_of_nodes + node_index] - - n_a.vd[18 * para->number_of_nodes + node_index]); - - mode[3] = (n_a.vd[5 * para->number_of_nodes + node_index] - - n_a.vd[6 * para->number_of_nodes + node_index]) + - (n_a.vd[11 * para->number_of_nodes + node_index] - - n_a.vd[12 * para->number_of_nodes + node_index]) - - (n_a.vd[13 * para->number_of_nodes + node_index] - - n_a.vd[14 * para->number_of_nodes + node_index]) + - (n_a.vd[15 * para->number_of_nodes + node_index] - - n_a.vd[16 * para->number_of_nodes + node_index]) - - (n_a.vd[17 * para->number_of_nodes + node_index] - - n_a.vd[18 * para->number_of_nodes + node_index]); + mode[1] = (n_a.populations[1 * para->number_of_nodes + node_index] - + n_a.populations[2 * para->number_of_nodes + node_index]) + + (n_a.populations[7 * para->number_of_nodes + node_index] - + n_a.populations[8 * para->number_of_nodes + node_index]) + + (n_a.populations[9 * para->number_of_nodes + node_index] - + n_a.populations[10 * para->number_of_nodes + node_index]) + + (n_a.populations[11 * para->number_of_nodes + node_index] - + n_a.populations[12 * para->number_of_nodes + node_index]) + + (n_a.populations[13 * para->number_of_nodes + node_index] - + n_a.populations[14 * para->number_of_nodes + node_index]); + + mode[2] = (n_a.populations[3 * para->number_of_nodes + node_index] - + n_a.populations[4 * para->number_of_nodes + node_index]) + + (n_a.populations[7 * para->number_of_nodes + node_index] - + n_a.populations[8 * para->number_of_nodes + node_index]) - + (n_a.populations[9 * para->number_of_nodes + node_index] - + n_a.populations[10 * para->number_of_nodes + node_index]) + + (n_a.populations[15 * para->number_of_nodes + node_index] - + n_a.populations[16 * para->number_of_nodes + node_index]) + + (n_a.populations[17 * para->number_of_nodes + node_index] - + n_a.populations[18 * para->number_of_nodes + node_index]); + + mode[3] = (n_a.populations[5 * para->number_of_nodes + node_index] - + n_a.populations[6 * para->number_of_nodes + node_index]) + + (n_a.populations[11 * para->number_of_nodes + node_index] - + n_a.populations[12 * para->number_of_nodes + node_index]) - + (n_a.populations[13 * para->number_of_nodes + node_index] - + n_a.populations[14 * para->number_of_nodes + node_index]) + + (n_a.populations[15 * para->number_of_nodes + node_index] - + n_a.populations[16 * para->number_of_nodes + node_index]) - + (n_a.populations[17 * para->number_of_nodes + node_index] - + n_a.populations[18 * para->number_of_nodes + node_index]); } /** Calculate temperature of the fluid kernel @@ -1368,10 +1317,9 @@ __device__ __inline__ float three_point_polynomial_larger_than_half(float u) { /** * @brief Get velocity of at index. - * */ __device__ __inline__ float3 node_velocity(float rho_eq, LB_nodes_gpu n_a, - int index) { + unsigned index) { auto const boundary_index = n_a.boundary[index]; if (boundary_index) { @@ -1394,7 +1342,7 @@ velocity_interpolation(LB_nodes_gpu n_a, float const *particle_position, Utils::Array center_node_index{}; Utils::Array temp_delta{}; - for (int i = 0; i < 3; ++i) { + for (unsigned i = 0; i < 3; ++i) { // position of particle in units of agrid. auto const scaled_pos = particle_position[i] / para->agrid - 0.5f; center_node_index[i] = static_cast(rint(scaled_pos)); @@ -1431,7 +1379,7 @@ velocity_interpolation(LB_nodes_gpu n_a, float const *particle_position, return ind; }; - int cnt = 0; + unsigned cnt = 0; float3 interpolated_u{0.0f, 0.0f, 0.0f}; #pragma unroll 1 for (int i = 0; i < 3; ++i) { @@ -1446,7 +1394,7 @@ velocity_interpolation(LB_nodes_gpu n_a, float const *particle_position, auto const z = fold_if_necessary(center_node_index[2] - 1 + k, static_cast(para->dim_z)); delta[cnt] = temp_delta[i].x * temp_delta[j].y * temp_delta[k].z; - auto const index = xyz_to_index(x, y, z); + auto const index = static_cast(xyz_to_index(x, y, z)); node_indices[cnt] = index; auto const node_u = node_velocity(para->rho, n_a, index); @@ -1477,7 +1425,7 @@ velocity_interpolation(LB_nodes_gpu n_a, float const *particle_position, Utils::Array temp_delta; // Eq. (10) and (11) in @cite ahlrichs99a page 8227 #pragma unroll - for (int i = 0; i < 3; ++i) { + for (unsigned i = 0; i < 3; ++i) { auto const scaledpos = particle_position[i] / para->agrid - 0.5f; left_node_index[i] = static_cast(floorf(scaledpos)); temp_delta[3 + i] = scaledpos - static_cast(left_node_index[i]); @@ -1501,26 +1449,23 @@ velocity_interpolation(LB_nodes_gpu n_a, float const *particle_position, static_cast(para->dim_y); int const z = (left_node_index[2] + static_cast(para->dim_z)) % static_cast(para->dim_z); - auto xp1 = x + 1; - auto yp1 = y + 1; - auto zp1 = z + 1; auto fold_if_necessary = [](int ind, int dim) { return ind >= dim ? ind % dim : ind; }; - xp1 = fold_if_necessary(xp1, static_cast(para->dim_x)); - yp1 = fold_if_necessary(yp1, static_cast(para->dim_y)); - zp1 = fold_if_necessary(zp1, static_cast(para->dim_z)); - node_index[0] = xyz_to_index(x, y, z); - node_index[1] = xyz_to_index(xp1, y, z); - node_index[2] = xyz_to_index(x, yp1, z); - node_index[3] = xyz_to_index(xp1, yp1, z); - node_index[4] = xyz_to_index(x, y, zp1); - node_index[5] = xyz_to_index(xp1, y, zp1); - node_index[6] = xyz_to_index(x, yp1, zp1); - node_index[7] = xyz_to_index(xp1, yp1, zp1); + auto const xp1 = fold_if_necessary(x + 1, static_cast(para->dim_x)); + auto const yp1 = fold_if_necessary(y + 1, static_cast(para->dim_y)); + auto const zp1 = fold_if_necessary(z + 1, static_cast(para->dim_z)); + node_index[0] = static_cast(xyz_to_index(x, y, z)); + node_index[1] = static_cast(xyz_to_index(xp1, y, z)); + node_index[2] = static_cast(xyz_to_index(x, yp1, z)); + node_index[3] = static_cast(xyz_to_index(xp1, yp1, z)); + node_index[4] = static_cast(xyz_to_index(x, y, zp1)); + node_index[5] = static_cast(xyz_to_index(xp1, y, zp1)); + node_index[6] = static_cast(xyz_to_index(x, yp1, zp1)); + node_index[7] = static_cast(xyz_to_index(xp1, yp1, zp1)); float3 interpolated_u{0.0f, 0.0f, 0.0f}; - for (int i = 0; i < 8; ++i) { + for (unsigned i = 0; i < 8; ++i) { auto const node_u = node_velocity(para->rho, n_a, node_index[i]); interpolated_u.x += delta[i] * node_u.x; interpolated_u.y += delta[i] * node_u.y; @@ -1541,6 +1486,7 @@ velocity_interpolation(LB_nodes_gpu n_a, float const *particle_position, * @param[in] d_v Local device values * @param[in] flag_cs Determine if we are at the centre (0, * typical) or at the source (1, swimmer only) + * @param[in] philox_counter Philox counter * @param[in] friction Friction constant for the particle coupling * @tparam no_of_neighbours The number of neighbours to consider for * interpolation @@ -1635,7 +1581,8 @@ __device__ void calc_viscous_force( if (para->kT > 0.0) { /* add stochastic force of zero mean (eq. (15) @cite ahlrichs99a) */ float4 random_floats = random_wrapper_philox( - particle_data[part_index].identity, LBQ * 32, philox_counter); + static_cast(particle_data[part_index].identity), LBQ * 32, + philox_counter); /* lb_coupl_pref is stored in MD units (force). * Eq. (16) @cite ahlrichs99a. * The factor 12 comes from the fact that we use random numbers @@ -1697,8 +1644,8 @@ calc_node_force(Utils::Array const &delta, float const *delta_j, Utils::Array const &node_index, LB_node_force_density_gpu node_f) { - for (int node = 0; node < no_of_neighbours; ++node) { - for (int i = 0; i < 3; ++i) { + for (std::size_t node = 0; node < no_of_neighbours; ++node) { + for (unsigned i = 0; i < 3; ++i) { atomicAdd( &(node_f.force_density[i * para->number_of_nodes + node_index[node]]), delta[node] * delta_j[i]); @@ -1723,7 +1670,7 @@ calc_node_force(Utils::Array const &delta, */ __global__ void calc_n_from_rho_j_pi(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v, LB_node_force_density_gpu node_f, - int *gpu_check) { + bool *gpu_check) { /* TODO: this can handle only a uniform density, something similar, but local, has to be called every time the fields are set by the user ! */ unsigned int index = blockIdx.y * gridDim.x * blockDim.x + @@ -1731,9 +1678,9 @@ __global__ void calc_n_from_rho_j_pi(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v, if (index < para->number_of_nodes) { Utils::Array mode; - /* default values for fields in lattice units */ - gpu_check[0] = 1; + gpu_check[0] = true; + /* default values for fields in lattice units */ float Rho = para->rho; Utils::Array v{}; Utils::Array pi = {{Rho * D3Q19::c_sound_sq, 0.0f, @@ -1765,28 +1712,28 @@ __global__ void calc_n_from_rho_j_pi(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v, float tmp1, tmp2; /* update the q=0 sublattice */ - n_a.vd[(0) * para->number_of_nodes + index] = + n_a.populations[(0) * para->number_of_nodes + index] = 1.0f / 3.0f * (local_rho - avg_rho) - 1.0f / 2.0f * trace; /* update the q=1 sublattice */ rho_times_coeff = 1.0f / 18.0f * (local_rho - avg_rho); - n_a.vd[(1) * para->number_of_nodes + index] = + n_a.populations[(1) * para->number_of_nodes + index] = rho_times_coeff + 1.0f / 6.0f * local_j[0] + 1.0f / 4.0f * local_pi[0] - 1.0f / 12.0f * trace; - n_a.vd[(2) * para->number_of_nodes + index] = + n_a.populations[(2) * para->number_of_nodes + index] = rho_times_coeff - 1.0f / 6.0f * local_j[0] + 1.0f / 4.0f * local_pi[0] - 1.0f / 12.0f * trace; - n_a.vd[(3) * para->number_of_nodes + index] = + n_a.populations[(3) * para->number_of_nodes + index] = rho_times_coeff + 1.0f / 6.0f * local_j[1] + 1.0f / 4.0f * local_pi[2] - 1.0f / 12.0f * trace; - n_a.vd[(4) * para->number_of_nodes + index] = + n_a.populations[(4) * para->number_of_nodes + index] = rho_times_coeff - 1.0f / 6.0f * local_j[1] + 1.0f / 4.0f * local_pi[2] - 1.0f / 12.0f * trace; - n_a.vd[(5) * para->number_of_nodes + index] = + n_a.populations[(5) * para->number_of_nodes + index] = rho_times_coeff + 1.0f / 6.0f * local_j[2] + 1.0f / 4.0f * local_pi[5] - 1.0f / 12.0f * trace; - n_a.vd[(6) * para->number_of_nodes + index] = + n_a.populations[(6) * para->number_of_nodes + index] = rho_times_coeff - 1.0f / 6.0f * local_j[2] + 1.0f / 4.0f * local_pi[5] - 1.0f / 12.0f * trace; @@ -1795,48 +1742,48 @@ __global__ void calc_n_from_rho_j_pi(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v, tmp1 = local_pi[0] + local_pi[2]; tmp2 = 2.0f * local_pi[1]; - n_a.vd[(7) * para->number_of_nodes + index] = + n_a.populations[(7) * para->number_of_nodes + index] = rho_times_coeff + 1.0f / 12.0f * (local_j[0] + local_j[1]) + 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(8) * para->number_of_nodes + index] = + n_a.populations[(8) * para->number_of_nodes + index] = rho_times_coeff - 1.0f / 12.0f * (local_j[0] + local_j[1]) + 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(9) * para->number_of_nodes + index] = + n_a.populations[(9) * para->number_of_nodes + index] = rho_times_coeff + 1.0f / 12.0f * (local_j[0] - local_j[1]) + 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(10) * para->number_of_nodes + index] = + n_a.populations[(10) * para->number_of_nodes + index] = rho_times_coeff - 1.0f / 12.0f * (local_j[0] - local_j[1]) + 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; tmp1 = local_pi[0] + local_pi[5]; tmp2 = 2.0f * local_pi[3]; - n_a.vd[(11) * para->number_of_nodes + index] = + n_a.populations[(11) * para->number_of_nodes + index] = rho_times_coeff + 1.0f / 12.0f * (local_j[0] + local_j[2]) + 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(12) * para->number_of_nodes + index] = + n_a.populations[(12) * para->number_of_nodes + index] = rho_times_coeff - 1.0f / 12.0f * (local_j[0] + local_j[2]) + 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(13) * para->number_of_nodes + index] = + n_a.populations[(13) * para->number_of_nodes + index] = rho_times_coeff + 1.0f / 12.0f * (local_j[0] - local_j[2]) + 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(14) * para->number_of_nodes + index] = + n_a.populations[(14) * para->number_of_nodes + index] = rho_times_coeff - 1.0f / 12.0f * (local_j[0] - local_j[2]) + 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; tmp1 = local_pi[2] + local_pi[5]; tmp2 = 2.0f * local_pi[4]; - n_a.vd[(15) * para->number_of_nodes + index] = + n_a.populations[(15) * para->number_of_nodes + index] = rho_times_coeff + 1.0f / 12.0f * (local_j[1] + local_j[2]) + 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(16) * para->number_of_nodes + index] = + n_a.populations[(16) * para->number_of_nodes + index] = rho_times_coeff - 1.0f / 12.0f * (local_j[1] + local_j[2]) + 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(17) * para->number_of_nodes + index] = + n_a.populations[(17) * para->number_of_nodes + index] = rho_times_coeff + 1.0f / 12.0f * (local_j[1] - local_j[2]) + 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(18) * para->number_of_nodes + index] = + n_a.populations[(18) * para->number_of_nodes + index] = rho_times_coeff - 1.0f / 12.0f * (local_j[1] - local_j[2]) + 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; @@ -1845,7 +1792,7 @@ __global__ void calc_n_from_rho_j_pi(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v, } } -__global__ void set_force_density(int single_nodeindex, +__global__ void set_force_density(unsigned single_nodeindex, float const *force_density, LB_node_force_density_gpu node_f) { unsigned int index = blockIdx.y * gridDim.x * blockDim.x + @@ -1874,7 +1821,7 @@ __global__ void set_force_density(int single_nodeindex, * @param[out] d_v Local device values * @param[in] node_f Node forces */ -__global__ void set_u_from_rho_v_pi(LB_nodes_gpu n_a, int single_nodeindex, +__global__ void set_u_from_rho_v_pi(LB_nodes_gpu n_a, unsigned single_nodeindex, float const *velocity, LB_rho_v_gpu *d_v, LB_node_force_density_gpu node_f) { unsigned int index = blockIdx.y * gridDim.x * blockDim.x + @@ -1891,7 +1838,7 @@ __global__ void set_u_from_rho_v_pi(LB_nodes_gpu n_a, int single_nodeindex, Utils::Array mode_for_pi; float rho_from_m; float j_from_m[3]; - float pi_from_m[6]; + Utils::Array pi_from_m; // Calculate the modes for this node @@ -1904,7 +1851,7 @@ __global__ void set_u_from_rho_v_pi(LB_nodes_gpu n_a, int single_nodeindex, // Calculate the density, velocity, and pressure tensor // in LB unit for this node - calc_values_from_m(mode_for_pi, &d_v[single_nodeindex], &rho_from_m, + calc_values_from_m(mode_for_pi, d_v[single_nodeindex], &rho_from_m, j_from_m, pi_from_m); // Take LB component density and calculate the equilibrium part @@ -1929,29 +1876,29 @@ __global__ void set_u_from_rho_v_pi(LB_nodes_gpu n_a, int single_nodeindex, // update the q=0 sublattice - n_a.vd[(0) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(0) * para->number_of_nodes + single_nodeindex] = 1.0f / 3.0f * (local_rho - avg_rho) - 1.0f / 2.0f * trace; // update the q=1 sublattice rho_times_coeff = 1.0f / 18.0f * (local_rho - avg_rho); - n_a.vd[(1) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(1) * para->number_of_nodes + single_nodeindex] = rho_times_coeff + 1.0f / 6.0f * local_j[0] + 1.0f / 4.0f * local_pi[0] - 1.0f / 12.0f * trace; - n_a.vd[(2) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(2) * para->number_of_nodes + single_nodeindex] = rho_times_coeff - 1.0f / 6.0f * local_j[0] + 1.0f / 4.0f * local_pi[0] - 1.0f / 12.0f * trace; - n_a.vd[(3) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(3) * para->number_of_nodes + single_nodeindex] = rho_times_coeff + 1.0f / 6.0f * local_j[1] + 1.0f / 4.0f * local_pi[2] - 1.0f / 12.0f * trace; - n_a.vd[(4) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(4) * para->number_of_nodes + single_nodeindex] = rho_times_coeff - 1.0f / 6.0f * local_j[1] + 1.0f / 4.0f * local_pi[2] - 1.0f / 12.0f * trace; - n_a.vd[(5) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(5) * para->number_of_nodes + single_nodeindex] = rho_times_coeff + 1.0f / 6.0f * local_j[2] + 1.0f / 4.0f * local_pi[5] - 1.0f / 12.0f * trace; - n_a.vd[(6) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(6) * para->number_of_nodes + single_nodeindex] = rho_times_coeff - 1.0f / 6.0f * local_j[2] + 1.0f / 4.0f * local_pi[5] - 1.0f / 12.0f * trace; @@ -1962,48 +1909,48 @@ __global__ void set_u_from_rho_v_pi(LB_nodes_gpu n_a, int single_nodeindex, tmp1 = local_pi[0] + local_pi[2]; tmp2 = 2.0f * local_pi[1]; - n_a.vd[(7) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(7) * para->number_of_nodes + single_nodeindex] = rho_times_coeff + 1.0f / 12.0f * (local_j[0] + local_j[1]) + 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(8) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(8) * para->number_of_nodes + single_nodeindex] = rho_times_coeff - 1.0f / 12.0f * (local_j[0] + local_j[1]) + 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(9) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(9) * para->number_of_nodes + single_nodeindex] = rho_times_coeff + 1.0f / 12.0f * (local_j[0] - local_j[1]) + 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(10) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(10) * para->number_of_nodes + single_nodeindex] = rho_times_coeff - 1.0f / 12.0f * (local_j[0] - local_j[1]) + 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; tmp1 = local_pi[0] + local_pi[5]; tmp2 = 2.0f * local_pi[3]; - n_a.vd[(11) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(11) * para->number_of_nodes + single_nodeindex] = rho_times_coeff + 1.0f / 12.0f * (local_j[0] + local_j[2]) + 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(12) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(12) * para->number_of_nodes + single_nodeindex] = rho_times_coeff - 1.0f / 12.0f * (local_j[0] + local_j[2]) + 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(13) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(13) * para->number_of_nodes + single_nodeindex] = rho_times_coeff + 1.0f / 12.0f * (local_j[0] - local_j[2]) + 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(14) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(14) * para->number_of_nodes + single_nodeindex] = rho_times_coeff - 1.0f / 12.0f * (local_j[0] - local_j[2]) + 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; tmp1 = local_pi[2] + local_pi[5]; tmp2 = 2.0f * local_pi[4]; - n_a.vd[(15) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(15) * para->number_of_nodes + single_nodeindex] = rho_times_coeff + 1.0f / 12.0f * (local_j[1] + local_j[2]) + 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(16) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(16) * para->number_of_nodes + single_nodeindex] = rho_times_coeff - 1.0f / 12.0f * (local_j[1] + local_j[2]) + 1.0f / 8.0f * (tmp1 + tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(17) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(17) * para->number_of_nodes + single_nodeindex] = rho_times_coeff + 1.0f / 12.0f * (local_j[1] - local_j[2]) + 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; - n_a.vd[(18) * para->number_of_nodes + single_nodeindex] = + n_a.populations[(18) * para->number_of_nodes + single_nodeindex] = rho_times_coeff - 1.0f / 12.0f * (local_j[1] - local_j[2]) + 1.0f / 8.0f * (tmp1 - tmp2) - 1.0f / 24.0f * trace; @@ -2060,7 +2007,7 @@ __global__ void reinit_node_force(LB_node_force_density_gpu node_f) { * @param[in] d_v Local modes */ __global__ void set_rho(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v, - int single_nodeindex, float rho) { + unsigned single_nodeindex, float rho) { unsigned int index = blockIdx.y * gridDim.x * blockDim.x + blockDim.x * blockIdx.x + threadIdx.x; /* Note: this sets the velocities to zero */ @@ -2071,43 +2018,43 @@ __global__ void set_rho(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v, local_rho = (rho - para->rho); d_v[single_nodeindex].rho = rho; - n_a.vd[0 * para->number_of_nodes + single_nodeindex] = + n_a.populations[0 * para->number_of_nodes + single_nodeindex] = 1.0f / 3.0f * local_rho; - n_a.vd[1 * para->number_of_nodes + single_nodeindex] = + n_a.populations[1 * para->number_of_nodes + single_nodeindex] = 1.0f / 18.0f * local_rho; - n_a.vd[2 * para->number_of_nodes + single_nodeindex] = + n_a.populations[2 * para->number_of_nodes + single_nodeindex] = 1.0f / 18.0f * local_rho; - n_a.vd[3 * para->number_of_nodes + single_nodeindex] = + n_a.populations[3 * para->number_of_nodes + single_nodeindex] = 1.0f / 18.0f * local_rho; - n_a.vd[4 * para->number_of_nodes + single_nodeindex] = + n_a.populations[4 * para->number_of_nodes + single_nodeindex] = 1.0f / 18.0f * local_rho; - n_a.vd[5 * para->number_of_nodes + single_nodeindex] = + n_a.populations[5 * para->number_of_nodes + single_nodeindex] = 1.0f / 18.0f * local_rho; - n_a.vd[6 * para->number_of_nodes + single_nodeindex] = + n_a.populations[6 * para->number_of_nodes + single_nodeindex] = 1.0f / 18.0f * local_rho; - n_a.vd[7 * para->number_of_nodes + single_nodeindex] = + n_a.populations[7 * para->number_of_nodes + single_nodeindex] = 1.0f / 36.0f * local_rho; - n_a.vd[8 * para->number_of_nodes + single_nodeindex] = + n_a.populations[8 * para->number_of_nodes + single_nodeindex] = 1.0f / 36.0f * local_rho; - n_a.vd[9 * para->number_of_nodes + single_nodeindex] = + n_a.populations[9 * para->number_of_nodes + single_nodeindex] = 1.0f / 36.0f * local_rho; - n_a.vd[10 * para->number_of_nodes + single_nodeindex] = + n_a.populations[10 * para->number_of_nodes + single_nodeindex] = 1.0f / 36.0f * local_rho; - n_a.vd[11 * para->number_of_nodes + single_nodeindex] = + n_a.populations[11 * para->number_of_nodes + single_nodeindex] = 1.0f / 36.0f * local_rho; - n_a.vd[12 * para->number_of_nodes + single_nodeindex] = + n_a.populations[12 * para->number_of_nodes + single_nodeindex] = 1.0f / 36.0f * local_rho; - n_a.vd[13 * para->number_of_nodes + single_nodeindex] = + n_a.populations[13 * para->number_of_nodes + single_nodeindex] = 1.0f / 36.0f * local_rho; - n_a.vd[14 * para->number_of_nodes + single_nodeindex] = + n_a.populations[14 * para->number_of_nodes + single_nodeindex] = 1.0f / 36.0f * local_rho; - n_a.vd[15 * para->number_of_nodes + single_nodeindex] = + n_a.populations[15 * para->number_of_nodes + single_nodeindex] = 1.0f / 36.0f * local_rho; - n_a.vd[16 * para->number_of_nodes + single_nodeindex] = + n_a.populations[16 * para->number_of_nodes + single_nodeindex] = 1.0f / 36.0f * local_rho; - n_a.vd[17 * para->number_of_nodes + single_nodeindex] = + n_a.populations[17 * para->number_of_nodes + single_nodeindex] = 1.0f / 36.0f * local_rho; - n_a.vd[18 * para->number_of_nodes + single_nodeindex] = + n_a.populations[18 * para->number_of_nodes + single_nodeindex] = 1.0f / 36.0f * local_rho; } } @@ -2115,12 +2062,14 @@ __global__ void set_rho(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v, /** Set the boundary flag for all boundary nodes * @param[in] boundary_node_list Indices of the boundary nodes * @param[in] boundary_index_list Flag for the corresponding boundary + * @param[in] boundary_velocities Boundary velocities * @param[in] number_of_boundnodes Number of boundary nodes + * @param[in] boundaries Boundary information */ __global__ void init_boundaries(int const *boundary_node_list, int const *boundary_index_list, float const *boundary_velocities, - int number_of_boundnodes, + unsigned number_of_boundnodes, LB_boundaries_gpu boundaries) { unsigned int index = blockIdx.y * gridDim.x * blockDim.x + blockDim.x * blockIdx.x + threadIdx.x; @@ -2134,7 +2083,7 @@ __global__ void init_boundaries(int const *boundary_node_list, boundary_velocities[3 * (boundary_index - 1) + 1], boundary_velocities[3 * (boundary_index - 1) + 2]}; - boundaries.index[node_index] = boundary_index; + boundaries.index[node_index] = static_cast(boundary_index); boundaries.velocity[node_index] = v; } } @@ -2153,10 +2102,11 @@ __global__ void reset_boundaries(LB_boundaries_gpu boundaries) { * @param[out] n_b Local node residing in array b * @param[in,out] d_v Local device values * @param[in,out] node_f Local node force density + * @param[in] philox_counter Philox counter */ __global__ void integrate(LB_nodes_gpu n_a, LB_nodes_gpu n_b, LB_rho_v_gpu *d_v, LB_node_force_density_gpu node_f, - unsigned int philox_counter) { + uint64_t philox_counter) { /* every node is connected to a thread via the index */ unsigned int index = blockIdx.y * gridDim.x * blockDim.x + blockDim.x * blockIdx.x + threadIdx.x; @@ -2202,9 +2152,11 @@ __global__ void integrate(LB_nodes_gpu n_a, LB_nodes_gpu n_b, LB_rho_v_gpu *d_v, * @param[in,out] particle_force Particle force * @param[out] node_f Local node force * @param[in] d_v Local device values + * @param[in] couple_virtual If true, virtual particles are also coupled * @param[in] friction Friction constant for the particle coupling + * @param[in] philox_counter Philox counter * @tparam no_of_neighbours The number of neighbours to consider for - * interpolation + * interpolation */ template __global__ void calc_fluid_particle_ia( @@ -2299,8 +2251,9 @@ __global__ void lb_get_boundaries(LB_nodes_gpu n_a, * @param[out] d_v Local device values * @param[in] node_f Local node force */ -__global__ void lb_print_node(int single_nodeindex, LB_rho_v_pi_gpu *d_p_v, - LB_nodes_gpu n_a, LB_rho_v_gpu *d_v, +__global__ void lb_print_node(unsigned int single_nodeindex, + LB_rho_v_pi_gpu *d_p_v, LB_nodes_gpu n_a, + LB_rho_v_gpu *d_v, LB_node_force_density_gpu node_f) { Utils::Array mode; unsigned int index = blockIdx.y * gridDim.x * blockDim.x + @@ -2348,7 +2301,7 @@ __global__ void momentum(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v, * @param[out] device_flag Result * @param[in] n_a Local node residing in array a */ -__global__ void lb_get_boundary_flag(int single_nodeindex, +__global__ void lb_get_boundary_flag(unsigned int single_nodeindex, unsigned int *device_flag, LB_nodes_gpu n_a) { unsigned int index = blockIdx.y * gridDim.x * blockDim.x + @@ -2362,33 +2315,28 @@ __global__ void lb_get_boundary_flag(int single_nodeindex, /* Host functions to setup and call kernels*/ /**********************************************************************/ -void lb_get_para_pointer(LB_parameters_gpu **pointeradress) { - if (cudaGetSymbolAddress((void **)pointeradress, para) != cudaSuccess) { - fprintf(stderr, - "Trouble getting address of LB parameters.\n"); // TODO give proper - // error message +void lb_get_para_pointer(LB_parameters_gpu **pointer_address) { + auto const error = cudaGetSymbolAddress((void **)pointer_address, para); + if (error != cudaSuccess) { + fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(error)); errexit(); } } -void lb_get_lbpar_pointer(LB_parameters_gpu **pointeradress) { - *pointeradress = &lbpar_gpu; -} - -void lb_get_boundary_force_pointer(float **pointeradress) { +void lb_get_boundary_force_pointer(float **pointer_address) { #ifdef LB_BOUNDARIES_GPU - *pointeradress = lb_boundary_force; + *pointer_address = lb_boundary_force; #endif } -void lb_get_device_values_pointer(LB_rho_v_gpu **pointeradress) { - *pointeradress = device_rho_v; +void lb_get_device_values_pointer(LB_rho_v_gpu **pointer_address) { + *pointer_address = device_rho_v; } /** Initialization for the lb gpu fluid called from host * @param lbpar_gpu Pointer to parameters to setup the lb field */ -void lb_init_GPU(LB_parameters_gpu *lbpar_gpu) { +void lb_init_GPU(const LB_parameters_gpu &lbpar_gpu) { #define free_realloc_and_clear(var, size) \ { \ if ((var) != nullptr) \ @@ -2397,8 +2345,8 @@ void lb_init_GPU(LB_parameters_gpu *lbpar_gpu) { cudaMemset(var, 0, size); \ } - size_of_rho_v = lbpar_gpu->number_of_nodes * sizeof(LB_rho_v_gpu); - size_of_rho_v_pi = lbpar_gpu->number_of_nodes * sizeof(LB_rho_v_pi_gpu); + size_of_rho_v = lbpar_gpu.number_of_nodes * sizeof(LB_rho_v_gpu); + size_of_rho_v_pi = lbpar_gpu.number_of_nodes * sizeof(LB_rho_v_pi_gpu); /* Allocate structs in device memory*/ free_realloc_and_clear(device_rho_v, size_of_rho_v); @@ -2406,61 +2354,51 @@ void lb_init_GPU(LB_parameters_gpu *lbpar_gpu) { /* TODO: this is almost a copy of device_rho_v; think about eliminating * it, and maybe pi can be added to device_rho_v in this case */ free_realloc_and_clear(print_rho_v_pi, size_of_rho_v_pi); - free_realloc_and_clear(nodes_a.vd, - lbpar_gpu->number_of_nodes * 19 * sizeof(float)); - free_realloc_and_clear(nodes_b.vd, - lbpar_gpu->number_of_nodes * 19 * sizeof(float)); + free_realloc_and_clear(nodes_a.populations, + lbpar_gpu.number_of_nodes * 19 * sizeof(float)); + free_realloc_and_clear(nodes_b.populations, + lbpar_gpu.number_of_nodes * 19 * sizeof(float)); free_realloc_and_clear(node_f.force_density, - lbpar_gpu->number_of_nodes * 3 * sizeof(lbForceFloat)); + lbpar_gpu.number_of_nodes * 3 * sizeof(lbForceFloat)); #if defined(VIRTUAL_SITES_INERTIALESS_TRACERS) || defined(EK_DEBUG) free_realloc_and_clear(node_f.force_density_buf, - lbpar_gpu->number_of_nodes * 3 * sizeof(lbForceFloat)); + lbpar_gpu.number_of_nodes * 3 * sizeof(lbForceFloat)); #endif free_realloc_and_clear(boundaries.index, - lbpar_gpu->number_of_nodes * sizeof(unsigned int)); + lbpar_gpu.number_of_nodes * sizeof(unsigned int)); free_realloc_and_clear(boundaries.velocity, - lbpar_gpu->number_of_nodes * + lbpar_gpu.number_of_nodes * sizeof(Utils::Array)); nodes_a.boundary = nodes_b.boundary = boundaries.index; nodes_a.boundary_velocity = nodes_b.boundary_velocity = boundaries.velocity; - /*write parameters in const memory*/ - cuda_safe_mem(cudaMemcpyToSymbol(para, lbpar_gpu, sizeof(LB_parameters_gpu))); - - /*check flag if lb gpu init works*/ - free_realloc_and_clear(gpu_check, sizeof(int)); + /* write parameters in const memory */ + cuda_safe_mem( + cudaMemcpyToSymbol(para, &lbpar_gpu, sizeof(LB_parameters_gpu))); - if (h_gpu_check != nullptr) - free(h_gpu_check); + free_realloc_and_clear(device_gpu_lb_initialized, sizeof(bool)); - h_gpu_check = (int *)Utils::malloc(sizeof(int)); - - /* values for the kernel call */ - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = static_cast( - (lbpar_gpu->number_of_nodes + threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); KERNELCALL(reset_boundaries, dim_grid, threads_per_block, boundaries); - /* calc of velocitydensities from given parameters and initialize the + /* calc of velocity densities from given parameters and initialize the * Node_Force array with zero */ KERNELCALL(reinit_node_force, dim_grid, threads_per_block, (node_f)); KERNELCALL(calc_n_from_rho_j_pi, dim_grid, threads_per_block, nodes_a, - device_rho_v, node_f, gpu_check); + device_rho_v, node_f, device_gpu_lb_initialized); intflag = true; current_nodes = &nodes_a; - h_gpu_check[0] = 0; - cuda_safe_mem( - cudaMemcpy(h_gpu_check, gpu_check, sizeof(int), cudaMemcpyDeviceToHost)); + bool host_gpu_lb_initialized = false; + cuda_safe_mem(cudaMemcpy(&host_gpu_lb_initialized, device_gpu_lb_initialized, + sizeof(bool), cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); - if (!h_gpu_check[0]) { - fprintf(stderr, "initialization of lb gpu code failed! \n"); + if (!host_gpu_lb_initialized) { + fprintf(stderr, "initialization of LB GPU code failed!\n"); errexit(); } } @@ -2472,18 +2410,13 @@ void lb_reinit_GPU(LB_parameters_gpu *lbpar_gpu) { /* write parameters in const memory */ cuda_safe_mem(cudaMemcpyToSymbol(para, lbpar_gpu, sizeof(LB_parameters_gpu))); - /* values for the kernel call */ - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = static_cast( - (lbpar_gpu->number_of_nodes + threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(lbpar_gpu->number_of_nodes, 4, threads_per_block); /* calc of velocity densities from given parameters and initialize the * Node_Force array with zero */ KERNELCALL(calc_n_from_rho_j_pi, dim_grid, threads_per_block, nodes_a, - device_rho_v, node_f, gpu_check); + device_rho_v, node_f, device_gpu_lb_initialized); } #ifdef LB_BOUNDARIES_GPU @@ -2496,7 +2429,8 @@ void lb_reinit_GPU(LB_parameters_gpu *lbpar_gpu) { * @param host_lb_boundary_velocity The constant velocity at the boundary, * set by the user */ -void lb_init_boundaries_GPU(int host_n_lb_boundaries, int number_of_boundnodes, +void lb_init_boundaries_GPU(std::size_t host_n_lb_boundaries, + unsigned number_of_boundnodes, int *host_boundary_node_list, int *host_boundary_index_list, float *host_lb_boundary_velocity) { @@ -2524,12 +2458,8 @@ void lb_init_boundaries_GPU(int host_n_lb_boundaries, int number_of_boundnodes, cudaMemcpyHostToDevice)); /* values for the kernel call */ - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = static_cast( - (lbpar_gpu.number_of_nodes + threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); KERNELCALL(reset_boundaries, dim_grid, threads_per_block, boundaries); @@ -2542,16 +2472,10 @@ void lb_init_boundaries_GPU(int host_n_lb_boundaries, int number_of_boundnodes, fprintf(stderr, "WARNING: boundary cmd executed but no boundary node found!\n"); } else { - int threads_per_block_bound = 64; - int blocks_per_grid_bound_y = 4; - int blocks_per_grid_bound_x = - (number_of_boundnodes + - threads_per_block_bound * blocks_per_grid_bound_y - 1) / - (threads_per_block_bound * blocks_per_grid_bound_y); dim3 dim_grid_bound = - make_uint3(blocks_per_grid_bound_x, blocks_per_grid_bound_y, 1); + calculate_dim_grid(number_of_boundnodes, 4, threads_per_block); - KERNELCALL(init_boundaries, dim_grid_bound, threads_per_block_bound, + KERNELCALL(init_boundaries, dim_grid_bound, threads_per_block, boundary_node_list, boundary_index_list, boundary_velocity, number_of_boundnodes, boundaries); } @@ -2569,50 +2493,39 @@ void lb_init_boundaries_GPU(int host_n_lb_boundaries, int number_of_boundnodes, void lb_reinit_extern_nodeforce_GPU(LB_parameters_gpu *lbpar_gpu) { cuda_safe_mem(cudaMemcpyToSymbol(para, lbpar_gpu, sizeof(LB_parameters_gpu))); - /* values for the kernel call */ - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = static_cast( - (lbpar_gpu->number_of_nodes + threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(lbpar_gpu->number_of_nodes, 4, threads_per_block); KERNELCALL(reinit_node_force, dim_grid, threads_per_block, node_f); } /** Setup and call particle kernel from the host * @tparam no_of_neighbours The number of neighbours to consider for - * interpolation + * interpolation */ template void lb_calc_particle_lattice_ia_gpu(bool couple_virtual, double friction) { auto device_particles = gpu_get_particle_pointer(); - if (not device_particles.empty()) { - /* call of the particle kernel */ - /* values for the particle kernel */ - int threads_per_block_particles = 64; - int blocks_per_grid_particles_y = 4; - auto blocks_per_grid_particles_x = static_cast( - (device_particles.size() + - threads_per_block_particles * blocks_per_grid_particles_y - 1) / - (threads_per_block_particles * blocks_per_grid_particles_y)); - dim3 dim_grid_particles = - make_uint3(blocks_per_grid_particles_x, blocks_per_grid_particles_y, 1); - if (lbpar_gpu.kT > 0.0) { - assert(rng_counter_coupling_gpu); - KERNELCALL(calc_fluid_particle_ia, dim_grid_particles, - threads_per_block_particles, *current_nodes, device_particles, - gpu_get_particle_force_pointer(), node_f, device_rho_v, - couple_virtual, rng_counter_coupling_gpu->value(), - static_cast(friction)); - } else { - // We use a dummy value for the RNG counter if no temperature is set. - KERNELCALL(calc_fluid_particle_ia, dim_grid_particles, - threads_per_block_particles, *current_nodes, device_particles, - gpu_get_particle_force_pointer(), node_f, device_rho_v, - couple_virtual, 0, static_cast(friction)); - } + if (device_particles.empty()) { + return; + } + + dim3 dim_grid = calculate_dim_grid( + static_cast(device_particles.size()), 4, threads_per_block); + if (lbpar_gpu.kT > 0.0) { + assert(rng_counter_coupling_gpu); + KERNELCALL(calc_fluid_particle_ia, dim_grid, + threads_per_block, *current_nodes, device_particles, + gpu_get_particle_force_pointer(), node_f, device_rho_v, + couple_virtual, rng_counter_coupling_gpu->value(), + static_cast(friction)); + } else { + // We use a dummy value for the RNG counter if no temperature is set. + KERNELCALL(calc_fluid_particle_ia, dim_grid, + threads_per_block, *current_nodes, device_particles, + gpu_get_particle_force_pointer(), node_f, device_rho_v, + couple_virtual, 0, static_cast(friction)); } } template void lb_calc_particle_lattice_ia_gpu<8>(bool couple_virtual, @@ -2624,13 +2537,8 @@ template void lb_calc_particle_lattice_ia_gpu<27>(bool couple_virtual, * @param host_values struct to save the gpu values */ void lb_get_values_GPU(LB_rho_v_pi_gpu *host_values) { - /* values for the kernel call */ - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = static_cast( - (lbpar_gpu.number_of_nodes + threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); KERNELCALL(get_mesoscopic_values_in_LB_units, dim_grid, threads_per_block, *current_nodes, print_rho_v_pi, device_rho_v, node_f); @@ -2645,13 +2553,9 @@ void lb_get_boundary_flags_GPU(unsigned int *host_bound_array) { unsigned int *device_bound_array; cuda_safe_mem(cudaMalloc((void **)&device_bound_array, lbpar_gpu.number_of_nodes * sizeof(unsigned int))); - /* values for the kernel call */ - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = static_cast( - (lbpar_gpu.number_of_nodes + threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + + dim3 dim_grid = + calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); KERNELCALL(lb_get_boundaries, dim_grid, threads_per_block, *current_nodes, device_bound_array); @@ -2666,14 +2570,14 @@ void lb_get_boundary_flags_GPU(unsigned int *host_bound_array) { /** Setup and call kernel for getting macroscopic fluid values of a single * node */ -void lb_print_node_GPU(int single_nodeindex, +void lb_print_node_GPU(unsigned single_nodeindex, LB_rho_v_pi_gpu *host_print_values) { LB_rho_v_pi_gpu *device_print_values; cuda_safe_mem( cudaMalloc((void **)&device_print_values, sizeof(LB_rho_v_pi_gpu))); - int threads_per_block_print = 1; - int blocks_per_grid_print_y = 1; - int blocks_per_grid_print_x = 1; + unsigned threads_per_block_print = 1; + unsigned blocks_per_grid_print_y = 1; + unsigned blocks_per_grid_print_x = 1; dim3 dim_grid_print = make_uint3(blocks_per_grid_print_x, blocks_per_grid_print_y, 1); @@ -2696,13 +2600,8 @@ void lb_calc_fluid_mass_GPU(double *mass) { cuda_safe_mem( cudaMemcpy(tot_mass, &cpu_mass, sizeof(float), cudaMemcpyHostToDevice)); - /* values for the kernel call */ - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = static_cast( - (lbpar_gpu.number_of_nodes + threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); KERNELCALL(calc_mass, dim_grid, threads_per_block, *current_nodes, tot_mass); @@ -2723,13 +2622,8 @@ void lb_calc_fluid_momentum_GPU(double *host_mom) { cuda_safe_mem(cudaMemcpy(tot_momentum, host_momentum, 3 * sizeof(float), cudaMemcpyHostToDevice)); - /* values for the kernel call */ - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = static_cast( - (lbpar_gpu.number_of_nodes + threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); KERNELCALL(momentum, dim_grid, threads_per_block, *current_nodes, device_rho_v, node_f, tot_momentum); @@ -2747,7 +2641,7 @@ void lb_calc_fluid_momentum_GPU(double *host_mom) { * @param[out] host_checkpoint_vd LB populations */ void lb_save_checkpoint_GPU(float *const host_checkpoint_vd) { - cuda_safe_mem(cudaMemcpy(host_checkpoint_vd, current_nodes->vd, + cuda_safe_mem(cudaMemcpy(host_checkpoint_vd, current_nodes->populations, lbpar_gpu.number_of_nodes * 19 * sizeof(float), cudaMemcpyDeviceToHost)); } @@ -2759,7 +2653,7 @@ void lb_load_checkpoint_GPU(float const *const host_checkpoint_vd) { current_nodes = &nodes_a; intflag = true; - cuda_safe_mem(cudaMemcpy(current_nodes->vd, host_checkpoint_vd, + cuda_safe_mem(cudaMemcpy(current_nodes->populations, host_checkpoint_vd, lbpar_gpu.number_of_nodes * 19 * sizeof(float), cudaMemcpyHostToDevice)); } @@ -2768,12 +2662,13 @@ void lb_load_checkpoint_GPU(float const *const host_checkpoint_vd) { * @param single_nodeindex number of the node to get the flag for * @param host_flag here goes the value of the boundary flag */ -void lb_get_boundary_flag_GPU(int single_nodeindex, unsigned int *host_flag) { +void lb_get_boundary_flag_GPU(unsigned int single_nodeindex, + unsigned int *host_flag) { unsigned int *device_flag; cuda_safe_mem(cudaMalloc((void **)&device_flag, sizeof(unsigned int))); - int threads_per_block_flag = 1; - int blocks_per_grid_flag_y = 1; - int blocks_per_grid_flag_x = 1; + unsigned threads_per_block_flag = 1; + unsigned blocks_per_grid_flag_y = 1; + unsigned blocks_per_grid_flag_x = 1; dim3 dim_grid_flag = make_uint3(blocks_per_grid_flag_x, blocks_per_grid_flag_y, 1); @@ -2790,10 +2685,10 @@ void lb_get_boundary_flag_GPU(int single_nodeindex, unsigned int *host_flag) { * @param single_nodeindex the node to set the velocity for * @param host_rho the density to set */ -void lb_set_node_rho_GPU(int single_nodeindex, float host_rho) { - int threads_per_block_flag = 1; - int blocks_per_grid_flag_y = 1; - int blocks_per_grid_flag_x = 1; +void lb_set_node_rho_GPU(unsigned single_nodeindex, float host_rho) { + unsigned threads_per_block_flag = 1; + unsigned blocks_per_grid_flag_y = 1; + unsigned blocks_per_grid_flag_x = 1; dim3 dim_grid_flag = make_uint3(blocks_per_grid_flag_x, blocks_per_grid_flag_y, 1); KERNELCALL(set_rho, dim_grid_flag, threads_per_block_flag, *current_nodes, @@ -2804,14 +2699,14 @@ void lb_set_node_rho_GPU(int single_nodeindex, float host_rho) { * @param single_nodeindex the node to set the velocity for * @param host_velocity the velocity to set */ -void lb_set_node_velocity_GPU(int single_nodeindex, float *host_velocity) { +void lb_set_node_velocity_GPU(unsigned single_nodeindex, float *host_velocity) { float *device_velocity; cuda_safe_mem(cudaMalloc((void **)&device_velocity, 3 * sizeof(float))); cuda_safe_mem(cudaMemcpy(device_velocity, host_velocity, 3 * sizeof(float), cudaMemcpyHostToDevice)); - int threads_per_block_flag = 1; - int blocks_per_grid_flag_y = 1; - int blocks_per_grid_flag_x = 1; + unsigned threads_per_block_flag = 1; + unsigned blocks_per_grid_flag_y = 1; + unsigned blocks_per_grid_flag_x = 1; dim3 dim_grid_flag = make_uint3(blocks_per_grid_flag_x, blocks_per_grid_flag_y, 1); @@ -2839,13 +2734,8 @@ void reinit_parameters_GPU(LB_parameters_gpu *lbpar_gpu) { /** Integration kernel for the lb gpu fluid update called from host */ void lb_integrate_GPU() { - /* values for the kernel call */ - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = static_cast( - (lbpar_gpu.number_of_nodes + threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); #ifdef LB_BOUNDARIES_GPU if (!LBBoundaries::lbboundaries.empty()) { cuda_safe_mem( @@ -2887,19 +2777,14 @@ void lb_integrate_GPU() { #endif } -void lb_gpu_get_boundary_forces(double *forces) { +void lb_gpu_get_boundary_forces(std::vector &forces) { #ifdef LB_BOUNDARIES_GPU - auto *temp = (float *)Utils::malloc(3 * LBBoundaries::lbboundaries.size() * - sizeof(float)); - cuda_safe_mem( - cudaMemcpy(temp, lb_boundary_force, - 3 * LBBoundaries::lbboundaries.size() * sizeof(float), - cudaMemcpyDeviceToHost)); - - for (int i = 0; i < 3 * LBBoundaries::lbboundaries.size(); i++) { - forces[i] = -(double)temp[i]; - } - free(temp); + std::vector temp(3 * LBBoundaries::lbboundaries.size()); + cuda_safe_mem(cudaMemcpy(temp.data(), lb_boundary_force, + temp.size() * sizeof(float), + cudaMemcpyDeviceToHost)); + std::transform(temp.begin(), temp.end(), forces.begin(), + [](float val) { return -static_cast(val); }); #endif } @@ -2923,10 +2808,10 @@ struct lb_lbfluid_mass_of_particle { __global__ void lb_lbfluid_set_population_kernel(LB_nodes_gpu n_a, float const population[LBQ], int x, int y, int z) { - auto const index = xyz_to_index(x, y, z); + auto const index = static_cast(xyz_to_index(x, y, z)); - for (int i = 0; i < LBQ; ++i) { - n_a.vd[i * para->number_of_nodes + index] = population[i]; + for (unsigned i = 0; i < LBQ; ++i) { + n_a.populations[i * para->number_of_nodes + index] = population[i]; } } @@ -2958,10 +2843,10 @@ void lb_lbfluid_set_population(const Utils::Vector3i &xyz, __global__ void lb_lbfluid_get_population_kernel(LB_nodes_gpu n_a, float population[LBQ], int x, int y, int z) { - auto const index = xyz_to_index(x, y, z); + auto const index = static_cast(xyz_to_index(x, y, z)); - for (int i = 0; i < LBQ; ++i) { - population[i] = n_a.vd[i * para->number_of_nodes + index]; + for (unsigned i = 0; i < LBQ; ++i) { + population[i] = n_a.populations[i * para->number_of_nodes + index]; } } @@ -2986,8 +2871,8 @@ void lb_lbfluid_get_population(const Utils::Vector3i &xyz, /** * @brief Velocity interpolation functor - * @tparam no_of_neighbours The number of neighbours to consider for - * interpolation + * @tparam no_of_neighbours The number of neighbours to consider for + * interpolation */ template struct interpolation { LB_nodes_gpu current_nodes_gpu; @@ -3006,21 +2891,22 @@ template struct interpolation { template void lb_get_interpolated_velocity_gpu(double const *positions, double *velocities, int length) { - thrust::host_vector positions_host(length); - for (int p = 0; p < 3 * length; p += 3) { + auto const size = static_cast(length); + thrust::host_vector positions_host(size); + for (unsigned p = 0; p < 3 * size; p += 3) { // Cast double coming from python to float. positions_host[p / 3].x = static_cast(positions[p]); positions_host[p / 3].y = static_cast(positions[p + 1]); positions_host[p / 3].z = static_cast(positions[p + 2]); } thrust::device_vector positions_device = positions_host; - thrust::device_vector velocities_device(length); + thrust::device_vector velocities_device(size); thrust::transform( positions_device.begin(), positions_device.end(), velocities_device.begin(), interpolation(*current_nodes, device_rho_v)); thrust::host_vector velocities_host = velocities_device; - int index = 0; + unsigned index = 0; for (auto v : velocities_host) { velocities[index] = static_cast(v.x); velocities[index + 1] = static_cast(v.y); diff --git a/src/core/immersed_boundary/ImmersedBoundaries.cpp b/src/core/immersed_boundary/ImmersedBoundaries.cpp index 882e93183d7..1fa15773244 100644 --- a/src/core/immersed_boundary/ImmersedBoundaries.cpp +++ b/src/core/immersed_boundary/ImmersedBoundaries.cpp @@ -97,6 +97,9 @@ static const IBM_VolCons_Parameters *vol_cons_parameters(Particle const &p1) { */ void ImmersedBoundaries::calc_volumes(CellStructure &cs) { + if (!BoundariesFound) + return; + // Partial volumes for each soft particle, to be summed up std::vector tempVol(IBM_MAX_NUM); @@ -153,6 +156,9 @@ void ImmersedBoundaries::calc_volumes(CellStructure &cs) { /** Calculate and add the volume force to each node */ void ImmersedBoundaries::calc_volume_force(CellStructure &cs) { + if (!BoundariesFound) + return; + cs.bond_loop( [this](Particle &p1, int bond_id, Utils::Span partners) { if (boost::get(&bonded_ia_params[bond_id]) != diff --git a/src/core/immersed_boundary/ImmersedBoundaries.hpp b/src/core/immersed_boundary/ImmersedBoundaries.hpp index 0406532935b..ba6d815b0a4 100644 --- a/src/core/immersed_boundary/ImmersedBoundaries.hpp +++ b/src/core/immersed_boundary/ImmersedBoundaries.hpp @@ -27,7 +27,7 @@ class ImmersedBoundaries { public: - ImmersedBoundaries() : VolumeInitDone(false) { + ImmersedBoundaries() : VolumeInitDone(false), BoundariesFound(false) { VolumesCurrent.resize(IBM_MAX_NUM); } void init_volume_conservation(CellStructure &cs); @@ -38,8 +38,8 @@ class ImmersedBoundaries { void calc_volume_force(CellStructure &cs); std::vector VolumesCurrent; - bool VolumeInitDone = false; - bool BoundariesFound = false; + bool VolumeInitDone; + bool BoundariesFound; }; #endif diff --git a/src/core/immersed_boundary/ibm_tribend.cpp b/src/core/immersed_boundary/ibm_tribend.cpp index 23b98bb259c..6c8c7c82e77 100644 --- a/src/core/immersed_boundary/ibm_tribend.cpp +++ b/src/core/immersed_boundary/ibm_tribend.cpp @@ -25,6 +25,7 @@ #include +#include #include #include @@ -52,9 +53,7 @@ IBM_Tribend_Parameters::calc_forces(Particle const &p1, Particle const &p2, n2 /= Aj; // Get the prefactor for the force term - auto sc = n1 * n2; - if (sc > 1.0) - sc = 1.0; + auto const sc = std::min(1.0, n1 * n2); // Get theta as angle between normals auto theta = acos(sc); @@ -119,9 +118,7 @@ IBM_Tribend_Parameters::IBM_Tribend_Parameters(const int ind1, const int ind2, auto const n2 = n2l / n2l.norm(); // calculate theta0 by taking the acos of the scalar n1*n2 - auto sc = n1 * n2; - if (sc > 1.0) - sc = 1.0; + auto const sc = std::min(1.0, n1 * n2); theta0 = acos(sc); diff --git a/src/core/integrators/velocity_verlet_inline.hpp b/src/core/integrators/velocity_verlet_inline.hpp index 23a3e6f95a1..8d739c66e4e 100644 --- a/src/core/integrators/velocity_verlet_inline.hpp +++ b/src/core/integrators/velocity_verlet_inline.hpp @@ -40,7 +40,7 @@ inline void velocity_verlet_propagate_vel_pos(const ParticleRange &particles) { auto const skin2 = Utils::sqr(0.5 * skin); for (auto &p : particles) { #ifdef ROTATION - propagate_omega_quat_particle(p); + propagate_omega_quat_particle(p, time_step); #endif // Don't propagate translational degrees of freedom of vs @@ -91,7 +91,7 @@ inline void velocity_verlet_step_1(const ParticleRange &particles) { inline void velocity_verlet_step_2(const ParticleRange &particles) { velocity_verlet_propagate_vel_final(particles); #ifdef ROTATION - convert_torques_propagate_omega(particles); + convert_torques_propagate_omega(particles, time_step); #endif } diff --git a/src/core/integrators/velocity_verlet_npt.cpp b/src/core/integrators/velocity_verlet_npt.cpp index 13b782679ee..815e5896ad8 100644 --- a/src/core/integrators/velocity_verlet_npt.cpp +++ b/src/core/integrators/velocity_verlet_npt.cpp @@ -167,7 +167,7 @@ void velocity_verlet_npt_propagate_vel(const ParticleRange &particles) { for (auto &p : particles) { #ifdef ROTATION - propagate_omega_quat_particle(p); + propagate_omega_quat_particle(p, time_step); #endif // Don't propagate translational degrees of freedom of vs @@ -198,7 +198,7 @@ void velocity_verlet_npt_step_1(const ParticleRange &particles) { void velocity_verlet_npt_step_2(const ParticleRange &particles) { velocity_verlet_npt_propagate_vel_final(particles); #ifdef ROTATION - convert_torques_propagate_omega(particles); + convert_torques_propagate_omega(particles, time_step); #endif velocity_verlet_npt_finalize_p_inst(); } diff --git a/src/core/io/mpiio/mpiio.hpp b/src/core/io/mpiio/mpiio.hpp index 40ae48a66ef..be678886bd5 100644 --- a/src/core/io/mpiio/mpiio.hpp +++ b/src/core/io/mpiio/mpiio.hpp @@ -44,6 +44,7 @@ enum MPIIOOutputFields : unsigned int { * * \param filename A null-terminated filename prefix. * \param fields Output specifier which fields to dump. + * \param particles range of particles to serialize. */ void mpi_mpiio_common_write(const char *filename, unsigned fields, const ParticleRange &particles); diff --git a/src/core/observables/CylindricalDensityProfile.hpp b/src/core/observables/CylindricalDensityProfile.hpp index d18ea8baeac..a7d1de6e312 100644 --- a/src/core/observables/CylindricalDensityProfile.hpp +++ b/src/core/observables/CylindricalDensityProfile.hpp @@ -41,7 +41,9 @@ class CylindricalDensityProfile : public CylindricalPidProfileObservable { for (auto p : particles) { histogram.update(Utils::transform_coordinate_cartesian_to_cylinder( - folded_position(traits.position(p), box_geo) - center, axis)); + folded_position(traits.position(p), box_geo) - + transform_params->center(), + transform_params->axis(), transform_params->orientation())); } histogram.normalize(); diff --git a/src/core/observables/CylindricalFluxDensityProfile.hpp b/src/core/observables/CylindricalFluxDensityProfile.hpp index 13ef33b9178..73b65f08c81 100644 --- a/src/core/observables/CylindricalFluxDensityProfile.hpp +++ b/src/core/observables/CylindricalFluxDensityProfile.hpp @@ -43,11 +43,13 @@ class CylindricalFluxDensityProfile : public CylindricalPidProfileObservable { // Write data to the histogram for (auto p : particles) { - auto const pos = folded_position(traits.position(p), box_geo) - center; + auto const pos = folded_position(traits.position(p), box_geo) - + transform_params->center(); histogram.update( - Utils::transform_coordinate_cartesian_to_cylinder(pos, axis), - Utils::transform_vector_cartesian_to_cylinder(traits.velocity(p), - axis, pos)); + Utils::transform_coordinate_cartesian_to_cylinder( + pos, transform_params->axis(), transform_params->orientation()), + Utils::transform_vector_cartesian_to_cylinder( + traits.velocity(p), transform_params->axis(), pos)); } histogram.normalize(); return histogram.get_histogram(); diff --git a/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp b/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp index de51d4a9e10..5ef211f40db 100644 --- a/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp +++ b/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp @@ -42,13 +42,24 @@ CylindricalLBFluxDensityProfileAtParticlePositions::evaluate( auto const pos = folded_position(traits.position(p), box_geo); auto const v = lb_lbfluid_get_interpolated_velocity(pos) * lb_lbfluid_get_lattice_speed(); + auto const flux_dens = lb_lbfluid_get_interpolated_density(pos) * v; - histogram.update( - Utils::transform_coordinate_cartesian_to_cylinder(pos - center, axis), - Utils::transform_vector_cartesian_to_cylinder(v, axis, pos - center)); + histogram.update(Utils::transform_coordinate_cartesian_to_cylinder( + pos - transform_params->center(), + transform_params->axis(), + transform_params->orientation()), + Utils::transform_vector_cartesian_to_cylinder( + flux_dens, transform_params->axis(), + pos - transform_params->center())); } - histogram.normalize(); - return histogram.get_histogram(); + // normalize by number of hits per bin + auto hist_tmp = histogram.get_histogram(); + auto tot_count = histogram.get_tot_count(); + std::transform(hist_tmp.begin(), hist_tmp.end(), tot_count.begin(), + hist_tmp.begin(), [](auto hi, auto ci) { + return ci > 0 ? hi / static_cast(ci) : 0.; + }); + return hist_tmp; } } // namespace Observables diff --git a/src/core/observables/CylindricalLBProfileObservable.hpp b/src/core/observables/CylindricalLBProfileObservable.hpp index c1d5eea4d7e..df4e66e5936 100644 --- a/src/core/observables/CylindricalLBProfileObservable.hpp +++ b/src/core/observables/CylindricalLBProfileObservable.hpp @@ -21,6 +21,7 @@ #include "CylindricalProfileObservable.hpp" +#include #include #include #include @@ -30,15 +31,15 @@ namespace Observables { class CylindricalLBProfileObservable : public CylindricalProfileObservable { public: - CylindricalLBProfileObservable(Utils::Vector3d const ¢er, - Utils::Vector3d const &axis, int n_r_bins, - int n_phi_bins, int n_z_bins, double min_r, - double max_r, double min_phi, double max_phi, - double min_z, double max_z, - double sampling_density) - : CylindricalProfileObservable(center, axis, n_r_bins, n_phi_bins, - n_z_bins, min_r, max_r, min_phi, max_phi, - min_z, max_z), + CylindricalLBProfileObservable( + std::shared_ptr + transform_params, + int n_r_bins, int n_phi_bins, int n_z_bins, double min_r, double max_r, + double min_phi, double max_phi, double min_z, double max_z, + double sampling_density) + : CylindricalProfileObservable(std::move(transform_params), n_r_bins, + n_phi_bins, n_z_bins, min_r, max_r, + min_phi, max_phi, min_z, max_z), sampling_density(sampling_density) { calculate_sampling_positions(); } @@ -47,17 +48,16 @@ class CylindricalLBProfileObservable : public CylindricalProfileObservable { limits[0], limits[1], limits[2], n_bins[0], n_bins[1], n_bins[2], sampling_density); for (auto &p : sampling_positions) { - double theta; - Utils::Vector3d rotation_axis; - auto p_cart = Utils::transform_coordinate_cylinder_to_cartesian( - p, Utils::Vector3d{{0.0, 0.0, 1.0}}); + auto p_cart = Utils::transform_coordinate_cylinder_to_cartesian(p); // We have to rotate the coordinates since the utils function assumes // z-axis symmetry. - std::tie(theta, rotation_axis) = - Utils::rotation_params(Utils::Vector3d{{0.0, 0.0, 1.0}}, axis); + constexpr Utils::Vector3d z_axis{{0.0, 0.0, 1.0}}; + auto const theta = Utils::angle_between(z_axis, transform_params->axis()); + auto const rot_axis = + Utils::vector_product(z_axis, transform_params->axis()).normalize(); if (theta > std::numeric_limits::epsilon()) - p_cart = Utils::vec_rotate(rotation_axis, theta, p_cart); - p = p_cart + center; + p_cart = Utils::vec_rotate(rot_axis, theta, p_cart); + p = p_cart + transform_params->center(); } } std::vector sampling_positions; diff --git a/src/core/observables/CylindricalLBVelocityProfile.cpp b/src/core/observables/CylindricalLBVelocityProfile.cpp index 28148f5d493..791417859b8 100644 --- a/src/core/observables/CylindricalLBVelocityProfile.cpp +++ b/src/core/observables/CylindricalLBVelocityProfile.cpp @@ -35,11 +35,12 @@ std::vector CylindricalLBVelocityProfile::operator()() const { for (auto const &p : sampling_positions) { auto const velocity = lb_lbfluid_get_interpolated_velocity(p) * lb_lbfluid_get_lattice_speed(); - auto const pos_shifted = p - center; - auto const pos_cyl = - Utils::transform_coordinate_cartesian_to_cylinder(pos_shifted, axis); - histogram.update(pos_cyl, Utils::transform_vector_cartesian_to_cylinder( - velocity, axis, pos_shifted)); + auto const pos_shifted = p - transform_params->center(); + auto const pos_cyl = Utils::transform_coordinate_cartesian_to_cylinder( + pos_shifted, transform_params->axis(), transform_params->orientation()); + histogram.update(pos_cyl, + Utils::transform_vector_cartesian_to_cylinder( + velocity, transform_params->axis(), pos_shifted)); } auto hist_data = histogram.get_histogram(); auto const tot_count = histogram.get_tot_count(); diff --git a/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp b/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp index e1106574cfa..9650c7dda6f 100644 --- a/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp +++ b/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp @@ -41,17 +41,20 @@ std::vector CylindricalLBVelocityProfileAtParticlePositions::evaluate( lb_lbfluid_get_lattice_speed(); histogram.update( - Utils::transform_coordinate_cartesian_to_cylinder(pos - center, axis), - Utils::transform_vector_cartesian_to_cylinder(v, axis, pos - center)); + Utils::transform_coordinate_cartesian_to_cylinder( + pos - transform_params->center(), transform_params->axis(), + transform_params->orientation()), + Utils::transform_vector_cartesian_to_cylinder( + v, transform_params->axis(), pos - transform_params->center())); } + // normalize by number of hits per bin auto hist_tmp = histogram.get_histogram(); auto tot_count = histogram.get_tot_count(); - for (size_t ind = 0; ind < hist_tmp.size(); ++ind) { - if (tot_count[ind] > 0) { - hist_tmp[ind] /= static_cast(tot_count[ind]); - } - } + std::transform(hist_tmp.begin(), hist_tmp.end(), tot_count.begin(), + hist_tmp.begin(), [](auto hi, auto ci) { + return ci > 0 ? hi / static_cast(ci) : 0.; + }); return hist_tmp; } diff --git a/src/core/observables/CylindricalPidProfileObservable.hpp b/src/core/observables/CylindricalPidProfileObservable.hpp index bbece2be6e5..8a3ae7886e1 100644 --- a/src/core/observables/CylindricalPidProfileObservable.hpp +++ b/src/core/observables/CylindricalPidProfileObservable.hpp @@ -19,6 +19,8 @@ #ifndef OBSERVABLES_CYLINDRICALPIDPROFILEOBSERVABLE_HPP #define OBSERVABLES_CYLINDRICALPIDPROFILEOBSERVABLE_HPP +#include + #include "CylindricalProfileObservable.hpp" #include "PidObservable.hpp" @@ -27,16 +29,16 @@ namespace Observables { class CylindricalPidProfileObservable : public PidObservable, public CylindricalProfileObservable { public: - CylindricalPidProfileObservable(std::vector const &ids, - Utils::Vector3d const ¢er, - Utils::Vector3d const &axis, int n_r_bins, - int n_phi_bins, int n_z_bins, double min_r, - double max_r, double min_phi, double max_phi, - double min_z, double max_z) + CylindricalPidProfileObservable( + std::vector const &ids, + std::shared_ptr + transform_params, + int n_r_bins, int n_phi_bins, int n_z_bins, double min_r, double max_r, + double min_phi, double max_phi, double min_z, double max_z) : PidObservable(ids), - CylindricalProfileObservable(center, axis, n_r_bins, n_phi_bins, - n_z_bins, min_r, max_r, min_phi, max_phi, - min_z, max_z) {} + CylindricalProfileObservable(std::move(transform_params), n_r_bins, + n_phi_bins, n_z_bins, min_r, max_r, + min_phi, max_phi, min_z, max_z) {} }; } // Namespace Observables diff --git a/src/core/observables/CylindricalProfileObservable.hpp b/src/core/observables/CylindricalProfileObservable.hpp index c28669a4e62..224856f0cfd 100644 --- a/src/core/observables/CylindricalProfileObservable.hpp +++ b/src/core/observables/CylindricalProfileObservable.hpp @@ -22,12 +22,17 @@ #include "ProfileObservable.hpp" #include +#include +#include #include #include #include #include +#include +#include +#include #include namespace Observables { @@ -35,16 +40,16 @@ namespace Observables { /** Cylindrical profile observable */ class CylindricalProfileObservable : public ProfileObservable { public: - CylindricalProfileObservable(Utils::Vector3d const ¢er, - Utils::Vector3d const &axis, int n_r_bins, - int n_phi_bins, int n_z_bins, double min_r, - double max_r, double min_phi, double max_phi, - double min_z, double max_z) + CylindricalProfileObservable( + std::shared_ptr + transform_params, + int n_r_bins, int n_phi_bins, int n_z_bins, double min_r, double max_r, + double min_phi, double max_phi, double min_z, double max_z) : ProfileObservable(n_r_bins, n_phi_bins, n_z_bins, min_r, max_r, min_phi, max_phi, min_z, max_z), - center(center), axis(axis) {} - Utils::Vector3d center; - Utils::Vector3d axis; + transform_params(std::move(transform_params)) {} + + std::shared_ptr transform_params; }; } // Namespace Observables diff --git a/src/core/observables/CylindricalVelocityProfile.hpp b/src/core/observables/CylindricalVelocityProfile.hpp index 58dfa0bc5fc..6b70dd2feaf 100644 --- a/src/core/observables/CylindricalVelocityProfile.hpp +++ b/src/core/observables/CylindricalVelocityProfile.hpp @@ -43,11 +43,13 @@ class CylindricalVelocityProfile : public CylindricalPidProfileObservable { Utils::CylindricalHistogram histogram(n_bins, 3, limits); for (auto p : particles) { - auto const pos = folded_position(traits.position(p), box_geo) - center; + auto const pos = folded_position(traits.position(p), box_geo) - + transform_params->center(); histogram.update( - Utils::transform_coordinate_cartesian_to_cylinder(pos, axis), - Utils::transform_vector_cartesian_to_cylinder(traits.velocity(p), - axis, pos)); + Utils::transform_coordinate_cartesian_to_cylinder( + pos, transform_params->axis(), transform_params->orientation()), + Utils::transform_vector_cartesian_to_cylinder( + traits.velocity(p), transform_params->axis(), pos)); } auto hist_tmp = histogram.get_histogram(); diff --git a/src/core/observables/ParticleAngularVelocities.hpp b/src/core/observables/ParticleAngularVelocities.hpp index 03065a21811..ffaae395840 100644 --- a/src/core/observables/ParticleAngularVelocities.hpp +++ b/src/core/observables/ParticleAngularVelocities.hpp @@ -20,7 +20,6 @@ #define OBSERVABLES_PARTICLEANGULARVELOCITIES_HPP #include "PidObservable.hpp" -#include "integrate.hpp" #include "rotation.hpp" #include diff --git a/src/core/observables/ParticleBodyAngularVelocities.hpp b/src/core/observables/ParticleBodyAngularVelocities.hpp index e082d37ea25..22af41b07a6 100644 --- a/src/core/observables/ParticleBodyAngularVelocities.hpp +++ b/src/core/observables/ParticleBodyAngularVelocities.hpp @@ -20,7 +20,6 @@ #define OBSERVABLES_PARTICLEBODYANGULARVELOCITIES_HPP #include "PidObservable.hpp" -#include "integrate.hpp" #include diff --git a/src/core/observables/ParticleBodyVelocities.hpp b/src/core/observables/ParticleBodyVelocities.hpp index 772139c12b4..e95c016fbf4 100644 --- a/src/core/observables/ParticleBodyVelocities.hpp +++ b/src/core/observables/ParticleBodyVelocities.hpp @@ -20,7 +20,6 @@ #define OBSERVABLES_PARTICLEBODYVELOCITIES_HPP #include "PidObservable.hpp" -#include "integrate.hpp" #include "rotation.hpp" diff --git a/src/core/observables/ParticleForces.hpp b/src/core/observables/ParticleForces.hpp index 2c953961d35..962531c8772 100644 --- a/src/core/observables/ParticleForces.hpp +++ b/src/core/observables/ParticleForces.hpp @@ -21,7 +21,6 @@ #include "Particle.hpp" #include "PidObservable.hpp" -#include "integrate.hpp" #include #include diff --git a/src/core/particle_data.cpp b/src/core/particle_data.cpp index 73db3218e78..a185fa71eff 100644 --- a/src/core/particle_data.cpp +++ b/src/core/particle_data.cpp @@ -1021,14 +1021,6 @@ void mpi_rescale_particles(int dir, double scale) { * @param _delete if true, delete the exclusion instead of add */ void local_change_exclusion(int part1, int part2, int _delete) { - if (part1 == -1 && part2 == -1) { - for (auto &p : cell_structure.local_particles()) { - p.exclusions().clear(); - } - - return; - } - /* part1, if here */ auto part = cell_structure.get_local_particle(part1); if (part) { @@ -1088,8 +1080,6 @@ int change_exclusion(int part1, int part2, int _delete) { return ES_ERROR; } -void remove_all_exclusions() { mpi_send_exclusion(-1, -1, 1); } - void auto_exclusions(int distance) { /* partners is a list containing the currently found excluded particles for each particle, and their distance, as an interleaved list */ diff --git a/src/core/particle_data.hpp b/src/core/particle_data.hpp index 8fab4251da9..6eeb12c4eb3 100644 --- a/src/core/particle_data.hpp +++ b/src/core/particle_data.hpp @@ -321,9 +321,6 @@ const std::vector &get_particle_bonds(int part); * exclusion set) */ int change_exclusion(int part, int part2, int _delete); - -/** remove all exclusions. */ -void remove_all_exclusions(); #endif /** Remove particle with a given identity. Also removes all bonds to the diff --git a/src/core/polymer.hpp b/src/core/polymer.hpp index d6cab6cfbcd..17152c2b6a6 100644 --- a/src/core/polymer.hpp +++ b/src/core/polymer.hpp @@ -36,14 +36,18 @@ #include /** Determines valid polymer positions and returns them. + * @param partCfg particle collection * @param n_polymers how many polymers to create * @param beads_per_chain monomers per chain * @param bond_length length of the bonds between two monomers - * @param seed seed for RNG + * @param start_positions starting positions of each polymers * @param min_distance minimum distance between all particles * @param max_tries how often a monomer/polymer should be reset if * current position collides with a previous particle + * @param use_bond_angle whether to use the @p bond_angle argument * @param bond_angle desired bond-angle to be fixed + * @param respect_constraints whether to respect constraints + * @param seed seed for RNG */ std::vector> draw_polymer_positions(PartCfg &partCfg, int n_polymers, int beads_per_chain, diff --git a/src/core/rattle.cpp b/src/core/rattle.cpp index 45dee5a6663..cfce8583142 100644 --- a/src/core/rattle.cpp +++ b/src/core/rattle.cpp @@ -37,39 +37,6 @@ #include -/** \name Private functions */ -/************************************************************/ -/**@{*/ - -/** Positional Corrections are added to the current particle positions. Invoked - * from \ref correct_pos_shake() */ -static void app_pos_correction(const ParticleRange &particles); - -/** Transfers temporarily the current forces from f.f[3] of the \ref Particle - structure to r.p_old[3] location and also initializes velocity correction - vector. Invoked from \ref correct_vel_shake()*/ -static void transfer_force_init_vel(const ParticleRange &particles, - const ParticleRange &ghost_particles); - -/** Calculates corrections of the current particle velocities according to - RATTLE - algorithm. Invoked from \ref correct_vel_shake()*/ -static void compute_vel_corr_vec(int *repeat_, CellStructure &cs); - -/** Velocity corrections are added to the current particle velocities. Invoked - from - \ref correct_vel_shake()*/ -static void apply_vel_corr(const ParticleRange &particles); - -/**Invoked from \ref correct_vel_shake(). Put back the forces from r.p_old to - * f.f*/ -static void revert_force(const ParticleRange &particles, - const ParticleRange &ghost_particles); - -/**@}*/ - -/*Initialize old positions (particle positions at previous time step) - of the particles*/ void save_old_pos(const ParticleRange &particles, const ParticleRange &ghost_particles) { auto save_pos = [](Particle &p) { @@ -84,8 +51,9 @@ void save_old_pos(const ParticleRange &particles, save_pos(p); } -/**Initialize the correction vector. The correction vector is stored in f.f of - * particle structure. */ +/** Initialize the velocity correction vectors. The correction vectors are + * stored in @ref ParticleForce::f "Particle::f::f". + */ static void init_correction_vector(const ParticleRange &local_particles, const ParticleRange &ghost_particles) { auto reset_force = [](Particle &p) { @@ -131,7 +99,8 @@ static bool add_pos_corr_vec(Rigid_bond_parameters const &ia_params, return false; } -/**Compute positional corrections*/ + +/** Compute position corrections */ static void compute_pos_corr_vec(int *repeat_, CellStructure &cs) { cs.bond_loop( [repeat_](Particle &p1, int bond_id, Utils::Span partners) { @@ -148,20 +117,16 @@ static void compute_pos_corr_vec(int *repeat_, CellStructure &cs) { }); } -/**Apply corrections to each particle**/ +/** Apply position corrections */ static void app_pos_correction(const ParticleRange &particles) { - /*Apply corrections*/ for (auto &p : particles) { for (int j = 0; j < 3; j++) { p.r.p[j] += p.f.f[j]; p.m.v[j] += p.f.f[j]; } - /**Completed for one particle*/ - } // for i loop + } } -/** Calculates the corrections required for each of the particle coordinates - according to the RATTLE algorithm. Invoked from \ref correct_pos_shake()*/ void correct_pos_shake(CellStructure &cs) { cells_update_ghosts(Cells::DATA_PART_POSITION | Cells::DATA_PART_PROPERTIES); @@ -178,7 +143,7 @@ void correct_pos_shake(CellStructure &cs) { cell_structure.ghosts_reduce_forces(); app_pos_correction(particles); - /**Ghost Positions Update*/ + /* Ghost Positions Update */ cs.ghosts_update(Cells::DATA_PART_POSITION | Cells::DATA_PART_MOMENTUM); repeat = boost::mpi::all_reduce(comm_cart, (repeat_ > 0), @@ -194,11 +159,10 @@ void correct_pos_shake(CellStructure &cs) { check_resort_particles(); } -/**The forces are transferred temporarily from f.f member of particle structure - to r.p_old, - which is idle now and initialize the velocity correction vector to zero at - f.f[3] - of Particle structure*/ +/** Transfer the current forces from @ref ParticleForce::f "Particle::f::f" + * to @ref ParticlePosition::p_old "Particle::r::p_old" and reset the + * velocity correction vectors at @ref ParticleForce::f "Particle::f::f". + */ static void transfer_force_init_vel(const ParticleRange &particles, const ParticleRange &ghost_particles) { auto copy_reset = [](Particle &p) { @@ -246,7 +210,7 @@ static bool add_vel_corr_vec(Rigid_bond_parameters const &ia_params, return false; } -/** Velocity correction vectors are computed*/ +/** Compute velocity correction vectors */ static void compute_vel_corr_vec(int *repeat_, CellStructure &cs) { cs.bond_loop( [repeat_](Particle &p1, int bond_id, Utils::Span partners) { @@ -258,23 +222,21 @@ static void compute_vel_corr_vec(int *repeat_, CellStructure &cs) { *repeat_ += 1; } - /* Rigid bonds can not break */ + /* Rigid bonds cannot break */ return false; }); } -/**Apply velocity corrections*/ +/** Apply velocity corrections */ static void apply_vel_corr(const ParticleRange &particles) { - /*Apply corrections*/ for (auto &p : particles) { for (int j = 0; j < 3; j++) { p.m.v[j] += p.f.f[j]; } - /**Completed for one particle*/ - } // for i loop + } } -/**Put back the forces from r.p_old to f.f*/ +/** Put back the forces from r.p_old to f.f */ static void revert_force(const ParticleRange &particles, const ParticleRange &ghost_particles) { auto revert = [](Particle &p) { @@ -292,9 +254,9 @@ static void revert_force(const ParticleRange &particles, void correct_vel_shake(CellStructure &cs) { cs.ghosts_update(Cells::DATA_PART_POSITION | Cells::DATA_PART_MOMENTUM); - /**transfer the current forces to r.p_old of the particle structure so that - velocity corrections can be stored temporarily at the f.f[3] of the particle - structure */ + /* transfer the current forces to r.p_old of the particle structure so that + * velocity corrections can be stored temporarily at the f.f member of the + * particle structure */ auto particles = cs.local_particles(); auto ghost_particles = cs.ghost_particles(); @@ -322,7 +284,6 @@ void correct_vel_shake(CellStructure &cs) { this_node, cnt); errexit(); } - /**Puts back the forces from r.p_old to f.f[3]*/ revert_force(particles, ghost_particles); } diff --git a/src/core/rattle.hpp b/src/core/rattle.hpp index 88bf09b978b..9d193ee83a9 100644 --- a/src/core/rattle.hpp +++ b/src/core/rattle.hpp @@ -34,8 +34,9 @@ #ifdef BOND_CONSTRAINT -/** Transfers the current particle positions from r.p[3] to r.p_pold[3] - of the \ref Particle structure. Invoked from \ref correct_pos_shake() */ +/** Transfer the current particle positions from @ref ParticlePosition::p + * "Particle::r::p" to @ref ParticlePosition::p_old "Particle::r::p_old" + */ void save_old_pos(const ParticleRange &particles, const ParticleRange &ghost_particles); diff --git a/src/core/rotation.cpp b/src/core/rotation.cpp index c185e23cc26..86223f6c15a 100644 --- a/src/core/rotation.cpp +++ b/src/core/rotation.cpp @@ -34,7 +34,6 @@ #include "rotation.hpp" #ifdef ROTATION -#include "integrate.hpp" #include #include @@ -116,13 +115,13 @@ static void define_Qdd(Particle const &p, Utils::Quaternion &Qd, * notation for quaternions, while @cite omelyan98a uses scalar-last * notation. * - * For very high angular velocities (e.g. if the product of @ref time_step + * For very high angular velocities (e.g. if the product of @p time_step * with the largest component of @ref ParticleMomentum::omega "p.m.omega" * is superior to ~2.0), the calculation might fail. * * \todo implement for fixed_coord_flag */ -void propagate_omega_quat_particle(Particle &p) { +void propagate_omega_quat_particle(Particle &p, double time_step) { // If rotation for the particle is disabled entirely, return early. if (p.p.rotation == ROTATION_FIXED) @@ -159,7 +158,8 @@ void propagate_omega_quat_particle(Particle &p) { } } -void convert_torques_propagate_omega(const ParticleRange &particles) { +void convert_torques_propagate_omega(const ParticleRange &particles, + double time_step) { for (auto &p : particles) { // Skip particle if rotation is turned off entirely for it. if (p.p.rotation == ROTATION_FIXED) diff --git a/src/core/rotation.hpp b/src/core/rotation.hpp index c8f6d135946..ada1217f5e3 100644 --- a/src/core/rotation.hpp +++ b/src/core/rotation.hpp @@ -43,12 +43,13 @@ /** @brief Propagate angular velocities and update quaternions on a * particle. */ -void propagate_omega_quat_particle(Particle &p); +void propagate_omega_quat_particle(Particle &p, double time_step); /** @brief Convert torques to the body-fixed frame and propagate * angular velocities. */ -void convert_torques_propagate_omega(const ParticleRange &particles); +void convert_torques_propagate_omega(const ParticleRange &particles, + double time_step); /** Convert torques to the body-fixed frame before the integration loop. */ void convert_initial_torques(const ParticleRange &particles); diff --git a/src/core/short_range_loop.hpp b/src/core/short_range_loop.hpp index b11131d60aa..fc52ee3eb03 100644 --- a/src/core/short_range_loop.hpp +++ b/src/core/short_range_loop.hpp @@ -31,7 +31,7 @@ namespace detail { * any arguments. */ struct True { - template bool operator()(T...) const { return true; } + template bool operator()(T &...) const { return true; } }; } // namespace detail diff --git a/src/core/statistics.hpp b/src/core/statistics.hpp index 09c1290031e..d24c9a50f5a 100644 --- a/src/core/statistics.hpp +++ b/src/core/statistics.hpp @@ -32,6 +32,7 @@ /** Calculate the minimal distance of two particles with types in set1 resp. * set2. + * @param partCfg particle collection. * @param set1 types of particles * @param set2 types of particles * @return the minimal distance of two particles @@ -68,6 +69,7 @@ double distto(PartCfg &partCfg, const Utils::Vector3d &pos, int pid = -1); * into @p r_bins bins which are either equidistant (@p log_flag==false) or * logarithmically equidistant (@p log_flag==true). The result is stored * in the @p array dist. + * @param partCfg particle collection. * @param p1_types list with types of particles to find the distribution for. * @param p2_types list with types of particles the others are distributed * around. @@ -96,6 +98,7 @@ void calc_part_distribution(PartCfg &partCfg, std::vector const &p1_types, * and sf[1]=1. For q=7, there are no possible wave vectors, so * sf[2*(7-1)]=sf[2*(7-1)+1]=0. * + * @param partCfg particle collection * @param p_types list with types of particles to be analyzed * @param order the maximum wave vector length in 2PI/L */ diff --git a/src/core/stokesian_dynamics/sd_interface.cpp b/src/core/stokesian_dynamics/sd_interface.cpp index 626bbdc3fa6..86c4bcdfa1c 100644 --- a/src/core/stokesian_dynamics/sd_interface.cpp +++ b/src/core/stokesian_dynamics/sd_interface.cpp @@ -119,8 +119,6 @@ void set_sd_viscosity(double eta) { sd_viscosity = eta; } -double get_sd_viscosity() { return sd_viscosity; } - void set_sd_radius_dict(std::unordered_map const &x) { /* Check that radii are positive */ for (auto const &kv : x) { @@ -134,8 +132,6 @@ void set_sd_radius_dict(std::unordered_map const &x) { radius_dict = x; } -std::unordered_map get_sd_radius_dict() { return radius_dict; } - void set_sd_kT(double kT) { if (kT < 0.0) { throw std::runtime_error("kT has an invalid value: " + std::to_string(kT)); @@ -148,8 +144,6 @@ double get_sd_kT() { return sd_kT; } void set_sd_flags(int flg) { sd_flags = flg; } -int get_sd_flags() { return sd_flags; } - void propagate_vel_pos_sd(const ParticleRange &particles, const boost::mpi::communicator &comm, const double time_step) { diff --git a/src/core/stokesian_dynamics/sd_interface.hpp b/src/core/stokesian_dynamics/sd_interface.hpp index 01219147538..d9107a95bdd 100644 --- a/src/core/stokesian_dynamics/sd_interface.hpp +++ b/src/core/stokesian_dynamics/sd_interface.hpp @@ -35,16 +35,13 @@ #include void set_sd_viscosity(double eta); -double get_sd_viscosity(); void set_sd_radius_dict(std::unordered_map const &x); -std::unordered_map get_sd_radius_dict(); void set_sd_kT(double kT); double get_sd_kT(); void set_sd_flags(int flg); -int get_sd_flags(); /** Takes the forces and torques on all particles and computes their * velocities. Acts globally on particles on all nodes; i.e. particle data diff --git a/src/core/unit_tests/Particle_test.cpp b/src/core/unit_tests/Particle_test.cpp index 1f1fe77b6cc..a73eb0ad252 100644 --- a/src/core/unit_tests/Particle_test.cpp +++ b/src/core/unit_tests/Particle_test.cpp @@ -122,3 +122,68 @@ BOOST_AUTO_TEST_CASE(properties_serialization) { BOOST_CHECK_EQUAL(out.identity, prop.identity); } } + +void check_particle_force(ParticleForce const &out, ParticleForce const &ref) { + BOOST_TEST(out.f == ref.f, boost::test_tools::per_element()); +#ifdef ROTATION + BOOST_TEST(out.torque == ref.torque, boost::test_tools::per_element()); +#endif +} + +namespace Utils { +template <> +struct is_statically_serializable : std::true_type {}; +} // namespace Utils + +BOOST_AUTO_TEST_CASE(force_serialization) { + auto const expected_size = + Utils::MemcpyOArchive::packing_size(); + + BOOST_CHECK_LE(expected_size, sizeof(ParticleForce)); + + std::vector buf(expected_size); + + auto pf = ParticleForce{{1, 2, 3}}; +#ifdef ROTATION + pf.torque = {4, 5, 6}; +#endif + + { + auto oa = Utils::MemcpyOArchive{Utils::make_span(buf)}; + + oa << pf; + + BOOST_CHECK_EQUAL(oa.bytes_written(), expected_size); + } + + { + auto ia = Utils::MemcpyIArchive{Utils::make_span(buf)}; + ParticleForce out; + + ia >> out; + + BOOST_CHECK_EQUAL(ia.bytes_read(), expected_size); + check_particle_force(out, pf); + } +} + +BOOST_AUTO_TEST_CASE(force_constructors) { + + auto pf = ParticleForce{{1, 2, 3}}; +#ifdef ROTATION + pf.torque = {4, 5, 6}; +#endif + + // check copy constructor + { + ParticleForce out(pf); + check_particle_force(out, pf); + } + + // check copy assignment operator + { + ParticleForce out; // avoid copy elision + out = pf; + check_particle_force(out, pf); + } +} diff --git a/src/core/virtual_sites/lb_inertialess_tracers_cuda.cu b/src/core/virtual_sites/lb_inertialess_tracers_cuda.cu index 8aecba41972..9f6dddbf21b 100644 --- a/src/core/virtual_sites/lb_inertialess_tracers_cuda.cu +++ b/src/core/virtual_sites/lb_inertialess_tracers_cuda.cu @@ -17,7 +17,6 @@ * along with this program. If not, see . */ -// ******* // This is an internal file of the IMMERSED BOUNDARY implementation // It should not be included by any main ESPResSo routines // Functions to be exported for ESPResSo are in ibm_main.hpp @@ -31,7 +30,7 @@ #include "Particle.hpp" #include "cuda_interface.hpp" -#include "cuda_utils.hpp" +#include "cuda_utils.cuh" #include "grid_based_algorithms/lb_boundaries.hpp" #include "grid_based_algorithms/lbgpu.cuh" #include "grid_based_algorithms/lbgpu.hpp" @@ -40,29 +39,31 @@ #include -// To avoid include of communication.hpp in cuda file +// To avoid including communication.hpp extern int this_node; -// ***** Other functions for internal use ***** -void InitCUDA_IBM(int numParticles); +// Other functions for internal use +void InitCUDA_IBM(std::size_t numParticles); -// ***** Our own global variables ******** +// Our own global variables IBM_CUDA_ParticleDataInput *IBM_ParticleDataInput_device = nullptr; IBM_CUDA_ParticleDataOutput *IBM_ParticleDataOutput_device = nullptr; -int IBM_numParticlesCache = -1; // To detect a change in particle number which - // requires reallocation of memory +bool IBM_initialized = false; +std::size_t IBM_numParticlesCache = 0; // To detect a change in particle number + // which requires reallocation of memory -// ****** These variables are defined in lbgpu_cuda.cu, but we also want them -// here **** +// These variables are defined in lbgpu_cuda.cu, but we also want them here extern LB_node_force_density_gpu node_f; extern LB_nodes_gpu *current_nodes; -// ** These variables are static in lbgpu_cuda.cu, so we need to duplicate them +// These variables are static in lbgpu_cuda.cu, so we need to duplicate them // here. They are initialized in ForcesIntoFluid. The pointers are on the host, // but point into device memory. LB_parameters_gpu *para_gpu = nullptr; float *lb_boundary_velocity_IBM = nullptr; +static constexpr unsigned int threads_per_block = 64; + /** @copybrief calc_m_from_n * * This is a re-implementation of @ref calc_m_from_n. It does exactly the @@ -73,60 +74,60 @@ __device__ void Calc_m_from_n_IBM(const LB_nodes_gpu n_a, const LB_parameters_gpu *const paraP) { const LB_parameters_gpu ¶ = *paraP; // mass mode - mode[0] = n_a.vd[0 * para.number_of_nodes + index] + - n_a.vd[1 * para.number_of_nodes + index] + - n_a.vd[2 * para.number_of_nodes + index] + - n_a.vd[3 * para.number_of_nodes + index] + - n_a.vd[4 * para.number_of_nodes + index] + - n_a.vd[5 * para.number_of_nodes + index] + - n_a.vd[6 * para.number_of_nodes + index] + - n_a.vd[7 * para.number_of_nodes + index] + - n_a.vd[8 * para.number_of_nodes + index] + - n_a.vd[9 * para.number_of_nodes + index] + - n_a.vd[10 * para.number_of_nodes + index] + - n_a.vd[11 * para.number_of_nodes + index] + - n_a.vd[12 * para.number_of_nodes + index] + - n_a.vd[13 * para.number_of_nodes + index] + - n_a.vd[14 * para.number_of_nodes + index] + - n_a.vd[15 * para.number_of_nodes + index] + - n_a.vd[16 * para.number_of_nodes + index] + - n_a.vd[17 * para.number_of_nodes + index] + - n_a.vd[18 * para.number_of_nodes + index]; + mode[0] = n_a.populations[0 * para.number_of_nodes + index] + + n_a.populations[1 * para.number_of_nodes + index] + + n_a.populations[2 * para.number_of_nodes + index] + + n_a.populations[3 * para.number_of_nodes + index] + + n_a.populations[4 * para.number_of_nodes + index] + + n_a.populations[5 * para.number_of_nodes + index] + + n_a.populations[6 * para.number_of_nodes + index] + + n_a.populations[7 * para.number_of_nodes + index] + + n_a.populations[8 * para.number_of_nodes + index] + + n_a.populations[9 * para.number_of_nodes + index] + + n_a.populations[10 * para.number_of_nodes + index] + + n_a.populations[11 * para.number_of_nodes + index] + + n_a.populations[12 * para.number_of_nodes + index] + + n_a.populations[13 * para.number_of_nodes + index] + + n_a.populations[14 * para.number_of_nodes + index] + + n_a.populations[15 * para.number_of_nodes + index] + + n_a.populations[16 * para.number_of_nodes + index] + + n_a.populations[17 * para.number_of_nodes + index] + + n_a.populations[18 * para.number_of_nodes + index]; // momentum modes - mode[1] = (n_a.vd[1 * para.number_of_nodes + index] - - n_a.vd[2 * para.number_of_nodes + index]) + - (n_a.vd[7 * para.number_of_nodes + index] - - n_a.vd[8 * para.number_of_nodes + index]) + - (n_a.vd[9 * para.number_of_nodes + index] - - n_a.vd[10 * para.number_of_nodes + index]) + - (n_a.vd[11 * para.number_of_nodes + index] - - n_a.vd[12 * para.number_of_nodes + index]) + - (n_a.vd[13 * para.number_of_nodes + index] - - n_a.vd[14 * para.number_of_nodes + index]); - - mode[2] = (n_a.vd[3 * para.number_of_nodes + index] - - n_a.vd[4 * para.number_of_nodes + index]) + - (n_a.vd[7 * para.number_of_nodes + index] - - n_a.vd[8 * para.number_of_nodes + index]) - - (n_a.vd[9 * para.number_of_nodes + index] - - n_a.vd[10 * para.number_of_nodes + index]) + - (n_a.vd[15 * para.number_of_nodes + index] - - n_a.vd[16 * para.number_of_nodes + index]) + - (n_a.vd[17 * para.number_of_nodes + index] - - n_a.vd[18 * para.number_of_nodes + index]); - - mode[3] = (n_a.vd[5 * para.number_of_nodes + index] - - n_a.vd[6 * para.number_of_nodes + index]) + - (n_a.vd[11 * para.number_of_nodes + index] - - n_a.vd[12 * para.number_of_nodes + index]) - - (n_a.vd[13 * para.number_of_nodes + index] - - n_a.vd[14 * para.number_of_nodes + index]) + - (n_a.vd[15 * para.number_of_nodes + index] - - n_a.vd[16 * para.number_of_nodes + index]) - - (n_a.vd[17 * para.number_of_nodes + index] - - n_a.vd[18 * para.number_of_nodes + index]); + mode[1] = (n_a.populations[1 * para.number_of_nodes + index] - + n_a.populations[2 * para.number_of_nodes + index]) + + (n_a.populations[7 * para.number_of_nodes + index] - + n_a.populations[8 * para.number_of_nodes + index]) + + (n_a.populations[9 * para.number_of_nodes + index] - + n_a.populations[10 * para.number_of_nodes + index]) + + (n_a.populations[11 * para.number_of_nodes + index] - + n_a.populations[12 * para.number_of_nodes + index]) + + (n_a.populations[13 * para.number_of_nodes + index] - + n_a.populations[14 * para.number_of_nodes + index]); + + mode[2] = (n_a.populations[3 * para.number_of_nodes + index] - + n_a.populations[4 * para.number_of_nodes + index]) + + (n_a.populations[7 * para.number_of_nodes + index] - + n_a.populations[8 * para.number_of_nodes + index]) - + (n_a.populations[9 * para.number_of_nodes + index] - + n_a.populations[10 * para.number_of_nodes + index]) + + (n_a.populations[15 * para.number_of_nodes + index] - + n_a.populations[16 * para.number_of_nodes + index]) + + (n_a.populations[17 * para.number_of_nodes + index] - + n_a.populations[18 * para.number_of_nodes + index]); + + mode[3] = (n_a.populations[5 * para.number_of_nodes + index] - + n_a.populations[6 * para.number_of_nodes + index]) + + (n_a.populations[11 * para.number_of_nodes + index] - + n_a.populations[12 * para.number_of_nodes + index]) - + (n_a.populations[13 * para.number_of_nodes + index] - + n_a.populations[14 * para.number_of_nodes + index]) + + (n_a.populations[15 * para.number_of_nodes + index] - + n_a.populations[16 * para.number_of_nodes + index]) - + (n_a.populations[17 * para.number_of_nodes + index] - + n_a.populations[18 * para.number_of_nodes + index]); } __global__ void @@ -233,8 +234,8 @@ __global__ void ParticleVelocitiesFromLB_Kernel( particles_input[particleIndex].pos[2]}; float v[3] = {0}; - // ***** This part is copied from get_interpolated_velocity - // ***** + we add the force + we consider boundaries + // This part is copied from get_interpolated_velocity + // + we add the force + we consider boundaries float temp_delta[6]; float delta[8]; @@ -361,14 +362,8 @@ __global__ void ResetLBForces_Kernel(LB_node_force_density_gpu node_f, /** Call a kernel to reset the forces on the LB nodes to the external force. */ void IBM_ResetLBForces_GPU() { if (this_node == 0) { - // Setup for kernel call - int threads_per_block = 64; - int blocks_per_grid_y = 4; - auto blocks_per_grid_x = - static_cast((lbpar_gpu.number_of_nodes + - threads_per_block * blocks_per_grid_y - 1) / - (threads_per_block * blocks_per_grid_y)); - dim3 dim_grid = make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1); + dim3 dim_grid = + calculate_dim_grid(lbpar_gpu.number_of_nodes, 4, threads_per_block); KERNELCALL(ResetLBForces_Kernel, dim_grid, threads_per_block, node_f, para_gpu); @@ -386,65 +381,56 @@ void IBM_ForcesIntoFluid_GPU(ParticleRange particles) { // (2) Copy forces to the GPU // (3) interpolate on the LBM grid and spread forces - const int numParticles = gpu_get_particle_pointer().size(); + auto const numParticles = gpu_get_particle_pointer().size(); - // Storage only needed on master and allocated only once at the first time - // step if ( IBM_ParticleDataInput_host == nullptr && this_node == 0 ) - if (IBM_ParticleDataInput_host == nullptr || + // Storage only needed on head node + if (IBM_ParticleDataInput_host.empty() || !IBM_initialized || numParticles != IBM_numParticlesCache) InitCUDA_IBM(numParticles); // We gather particle positions and forces from all nodes IBM_cuda_mpi_get_particles(particles); - // ***** GPU stuff only on master ***** + // GPU only on head node if (this_node == 0 && numParticles > 0) { // Copy data to device cuda_safe_mem(cudaMemcpy(IBM_ParticleDataInput_device, - IBM_ParticleDataInput_host, + IBM_ParticleDataInput_host.data(), numParticles * sizeof(IBM_CUDA_ParticleDataInput), cudaMemcpyHostToDevice)); // Kernel call for spreading the forces on the LB grid - int threads_per_block_particles = 64; - int blocks_per_grid_particles_y = 4; - int blocks_per_grid_particles_x = - (numParticles + - threads_per_block_particles * blocks_per_grid_particles_y - 1) / - (threads_per_block_particles * blocks_per_grid_particles_y); - dim3 dim_grid_particles = - make_uint3(blocks_per_grid_particles_x, blocks_per_grid_particles_y, 1); - - KERNELCALL(ForcesIntoFluid_Kernel, dim_grid_particles, - threads_per_block_particles, IBM_ParticleDataInput_device, - numParticles, node_f, para_gpu); + dim3 dim_grid = calculate_dim_grid(static_cast(numParticles), 4, + threads_per_block); + KERNELCALL(ForcesIntoFluid_Kernel, dim_grid, threads_per_block, + IBM_ParticleDataInput_device, numParticles, node_f, para_gpu); } } -void InitCUDA_IBM(const int numParticles) { +void InitCUDA_IBM(std::size_t const numParticles) { - if (this_node == 0) // GPU only on master - { + // GPU only on head node + if (this_node == 0) { // Check if we have to delete - if (IBM_ParticleDataInput_host != nullptr) { - delete[] IBM_ParticleDataInput_host; - delete[] IBM_ParticleDataOutput_host; + if (!IBM_ParticleDataInput_host.empty()) { + IBM_ParticleDataInput_host.clear(); + IBM_ParticleDataOutput_host.clear(); cuda_safe_mem(cudaFree(IBM_ParticleDataInput_device)); cuda_safe_mem(cudaFree(IBM_ParticleDataOutput_device)); cuda_safe_mem(cudaFree(lb_boundary_velocity_IBM)); } // Back and forth communication of positions and velocities - IBM_ParticleDataInput_host = new IBM_CUDA_ParticleDataInput[numParticles]; + IBM_ParticleDataInput_host.resize(numParticles); + IBM_ParticleDataOutput_host.resize(numParticles); cuda_safe_mem( cudaMalloc((void **)&IBM_ParticleDataInput_device, numParticles * sizeof(IBM_CUDA_ParticleDataInput))); cuda_safe_mem( cudaMalloc((void **)&IBM_ParticleDataOutput_device, numParticles * sizeof(IBM_CUDA_ParticleDataOutput))); - IBM_ParticleDataOutput_host = new IBM_CUDA_ParticleDataOutput[numParticles]; // Use LB parameters lb_get_para_pointer(¶_gpu); @@ -480,6 +466,7 @@ void InitCUDA_IBM(const int numParticles) { #endif IBM_numParticlesCache = numParticles; + IBM_initialized = true; } } @@ -492,34 +479,26 @@ void ParticleVelocitiesFromLB_GPU(ParticleRange particles) { // (2) transfer velocities back to CPU // (3) spread velocities to local cells via MPI - const int numParticles = gpu_get_particle_pointer().size(); + auto const numParticles = gpu_get_particle_pointer().size(); - // **** GPU stuff only on master **** + // GPU only on head node if (this_node == 0 && numParticles > 0) { // Kernel call - int threads_per_block_particles = 64; - int blocks_per_grid_particles_y = 4; - int blocks_per_grid_particles_x = - (numParticles + - threads_per_block_particles * blocks_per_grid_particles_y - 1) / - (threads_per_block_particles * blocks_per_grid_particles_y); - dim3 dim_grid_particles = - make_uint3(blocks_per_grid_particles_x, blocks_per_grid_particles_y, 1); - KERNELCALL(ParticleVelocitiesFromLB_Kernel, dim_grid_particles, - threads_per_block_particles, *current_nodes, - IBM_ParticleDataInput_device, numParticles, + dim3 dim_grid = calculate_dim_grid(static_cast(numParticles), 4, + threads_per_block); + KERNELCALL(ParticleVelocitiesFromLB_Kernel, dim_grid, threads_per_block, + *current_nodes, IBM_ParticleDataInput_device, numParticles, IBM_ParticleDataOutput_device, node_f, lb_boundary_velocity_IBM, para_gpu); // Copy velocities from device to host - cuda_safe_mem(cudaMemcpy(IBM_ParticleDataOutput_host, + cuda_safe_mem(cudaMemcpy(IBM_ParticleDataOutput_host.data(), IBM_ParticleDataOutput_device, numParticles * sizeof(IBM_CUDA_ParticleDataOutput), cudaMemcpyDeviceToHost)); } - // ***** Back to all nodes **** - // Spread using MPI + // Scatter to all nodes IBM_cuda_mpi_send_velocities(particles); } diff --git a/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.cpp b/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.cpp index b8a4c70c9b1..a363e546c26 100644 --- a/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.cpp +++ b/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.cpp @@ -35,33 +35,32 @@ #include #include +#include + // Variables for communication -IBM_CUDA_ParticleDataInput *IBM_ParticleDataInput_host = nullptr; -IBM_CUDA_ParticleDataOutput *IBM_ParticleDataOutput_host = nullptr; +std::vector IBM_ParticleDataInput_host = {}; +std::vector IBM_ParticleDataOutput_host = {}; -namespace { -void pack_particles(ParticleRange particles, - IBM_CUDA_ParticleDataInput *buffer) { - int dummy[3] = {0, 0, 0}; +static void pack_particles(ParticleRange particles, + std::vector &buffer) { int i = 0; for (auto const &part : particles) { - Utils::Vector3d pos = folded_position(part.r.p, box_geo); + auto const pos = folded_position(part.r.p, box_geo); - buffer[i].pos[0] = (float)pos[0]; - buffer[i].pos[1] = (float)pos[1]; - buffer[i].pos[2] = (float)pos[2]; + buffer[i].pos[0] = static_cast(pos[0]); + buffer[i].pos[1] = static_cast(pos[1]); + buffer[i].pos[2] = static_cast(pos[2]); - buffer[i].f[0] = (float)part.f.f[0]; - buffer[i].f[1] = (float)part.f.f[1]; - buffer[i].f[2] = (float)part.f.f[2]; + buffer[i].f[0] = static_cast(part.f.f[0]); + buffer[i].f[1] = static_cast(part.f.f[1]); + buffer[i].f[2] = static_cast(part.f.f[2]); buffer[i].is_virtual = part.p.is_virtual; i++; } } -} // namespace /** Gather particle positions on the master node in order to communicate them * to GPU. We transfer all particles (real and virtual), but actually we would @@ -75,30 +74,28 @@ void IBM_cuda_mpi_get_particles(ParticleRange particles) { static std::vector buffer; buffer.resize(n_part); /* pack local parts into buffer */ - pack_particles(particles, buffer.data()); + pack_particles(particles, buffer); - Utils::Mpi::gather_buffer(buffer.data(), buffer.size(), comm_cart); + Utils::Mpi::gather_buffer(buffer, comm_cart); } else { /* Pack own particles */ pack_particles(particles, IBM_ParticleDataInput_host); - Utils::Mpi::gather_buffer(IBM_ParticleDataInput_host, n_part, comm_cart); + Utils::Mpi::gather_buffer(IBM_ParticleDataInput_host, comm_cart); } } -namespace { -void set_velocities(ParticleRange particles, - IBM_CUDA_ParticleDataOutput *buffer) { +static void set_velocities(ParticleRange particles, + std::vector &buffer) { int i = 0; for (auto &part : particles) { - if (part.p.is_virtual) + if (part.p.is_virtual) { for (int j = 0; j < 3; j++) - part.m.v[j] = buffer[i].v[j]; - + part.m.v[j] = static_cast(buffer[i].v[j]); + } i++; } } -} // namespace /** Particle velocities have been communicated from GPU, now transmit to all * nodes. Analogous to @ref cuda_mpi_send_forces. @@ -113,10 +110,11 @@ void IBM_cuda_mpi_send_velocities(ParticleRange particles) { Utils::Mpi::scatter_buffer(buffer.data(), n_part, comm_cart); - set_velocities(particles, buffer.data()); + set_velocities(particles, buffer); } else { /* Scatter forces to slaves */ - Utils::Mpi::scatter_buffer(IBM_ParticleDataOutput_host, n_part, comm_cart); + Utils::Mpi::scatter_buffer(IBM_ParticleDataOutput_host.data(), n_part, + comm_cart); set_velocities(particles, IBM_ParticleDataOutput_host); } diff --git a/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.hpp b/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.hpp index 80e02c6709a..db948444aca 100644 --- a/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.hpp +++ b/src/core/virtual_sites/lb_inertialess_tracers_cuda_interface.hpp @@ -31,6 +31,8 @@ #include "ParticleRange.hpp" +#include + // *********** Communication functions ******** // Implemented in real C++, but called from the ibm_cuda.cu void IBM_cuda_mpi_send_velocities(ParticleRange particles); @@ -50,8 +52,8 @@ typedef struct { } IBM_CUDA_ParticleDataOutput; // ******** global variables for CUDA and MPI communication ****** -extern IBM_CUDA_ParticleDataInput *IBM_ParticleDataInput_host; -extern IBM_CUDA_ParticleDataOutput *IBM_ParticleDataOutput_host; +extern std::vector IBM_ParticleDataInput_host; +extern std::vector IBM_ParticleDataOutput_host; #endif diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 79372bbed62..170f14123bc 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -2,6 +2,17 @@ set(PYTHON_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(PYTHON_FRONTEND ${PYTHON_EXECUTABLE}) +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + execute_process( + COMMAND "${PYTHON_FRONTEND}" "-c" + "import sysconfig; print(sysconfig.get_config_var('exec_prefix'))" + OUTPUT_VARIABLE DARWIN_EXEC_PREFIX OUTPUT_STRIP_TRAILING_WHITESPACE) + set(PYTHON_FRONTEND_TMP + "${DARWIN_EXEC_PREFIX}/Resources/Python.app/Contents/MacOS/Python") + if(EXISTS ${PYTHON_FRONTEND_TMP}) + set(PYTHON_FRONTEND ${PYTHON_FRONTEND_TMP}) + endif() +endif() configure_file(pypresso.cmakein ${CMAKE_BINARY_DIR}/pypresso @ONLY) if(IPYTHON_EXECUTABLE) diff --git a/src/python/espressomd/actors.pyx b/src/python/espressomd/actors.pyx index 35a1fceb599..fad742ce4bb 100644 --- a/src/python/espressomd/actors.pyx +++ b/src/python/espressomd/actors.pyx @@ -16,7 +16,7 @@ # along with this program. If not, see . include "myconfig.pxi" from .highlander import ThereCanOnlyBeOne -from .utils cimport handle_errors +from .utils import handle_errors cdef class Actor: @@ -79,7 +79,7 @@ cdef class Actor: if inter in Actor.active_list: if not Actor.active_list[inter]: raise Exception( - "Class not registered in Actor.active_list " + self.__class__.__bases__[0]) + "Class not registered in Actor.active_list: " + self.__class__.__bases__[0].__name__) Actor.active_list[inter] = False def is_valid(self): diff --git a/src/python/espressomd/analyze.pyx b/src/python/espressomd/analyze.pyx index 66343b96c4f..7f3bad50c93 100644 --- a/src/python/espressomd/analyze.pyx +++ b/src/python/espressomd/analyze.pyx @@ -31,9 +31,9 @@ from .globals import Globals from collections import OrderedDict from .system import System -from .utils import array_locked, is_valid_type +from .utils import array_locked, is_valid_type, handle_errors from .utils cimport Vector3i, Vector3d, Vector9d -from .utils cimport handle_errors, check_type_or_throw_except +from .utils cimport check_type_or_throw_except from .utils cimport create_nparray_from_double_array from .particle_data cimport get_n_part diff --git a/src/python/espressomd/cellsystem.pyx b/src/python/espressomd/cellsystem.pyx index 101d73b4d49..ecb96ba253f 100644 --- a/src/python/espressomd/cellsystem.pyx +++ b/src/python/espressomd/cellsystem.pyx @@ -25,7 +25,8 @@ from .globals cimport verlet_reuse, skin from .globals cimport mpi_bcast_parameter from libcpp.vector cimport vector from .cellsystem cimport cell_structure -from .utils cimport handle_errors, Vector3i, check_type_or_throw_except +from .utils import handle_errors +from .utils cimport Vector3i, check_type_or_throw_except cdef class CellSystem: diff --git a/src/python/espressomd/collision_detection.pyx b/src/python/espressomd/collision_detection.pyx index 4674f84b789..5797a0647ff 100644 --- a/src/python/espressomd/collision_detection.pyx +++ b/src/python/espressomd/collision_detection.pyx @@ -16,7 +16,7 @@ # along with this program. If not, see . from .script_interface import ScriptInterfaceHelper, script_interface_register from .utils import to_str -from .utils cimport handle_errors +from .utils import handle_errors from .interactions import BondedInteraction, BondedInteractions @@ -117,7 +117,7 @@ class CollisionDetection(ScriptInterfaceHelper): """ - if not ("mode" in kwargs): + if "mode" not in kwargs: raise Exception( "Collision mode must be specified via the mode keyword argument") diff --git a/src/python/espressomd/constraints.py b/src/python/espressomd/constraints.py index d5cdbe5f400..91cb63a8f69 100644 --- a/src/python/espressomd/constraints.py +++ b/src/python/espressomd/constraints.py @@ -342,10 +342,11 @@ class ForceField(_Interpolated): Spacing of the grid points. default_scale : :obj:`float` Scaling factor for particles that have no individual scaling factor. - particle_scales : array_like of (:obj:`int`, :obj:`float`) - A list of tuples of ids and scaling factors. For - particles in the list the interaction is scaled with - their individual scaling factor before it is applied. + particle_scales : :obj:`dict` + A dictionary mapping particle ids to scaling factors. + For these particles, the interaction is scaled with + their individual scaling factor. Other particles are + scaled with the default scaling factor. """ @@ -373,10 +374,11 @@ class PotentialField(_Interpolated): Spacing of the grid points. default_scale : :obj:`float` Scaling factor for particles that have no individual scaling factor. - particle_scales : array_like (:obj:`int`, :obj:`float`) - A list of tuples of ids and scaling factors. For - particles in the list the interaction is scaled with - their individual scaling factor before it is applied. + particle_scales : :obj:`dict` + A dictionary mapping particle ids to scaling factors. + For these particles, the interaction is scaled with + their individual scaling factor. Other particles are + scaled with the default scaling factor. """ diff --git a/src/python/espressomd/cuda_init.pxd b/src/python/espressomd/cuda_init.pxd index 34d7a6c05d7..34c3e703475 100644 --- a/src/python/espressomd/cuda_init.pxd +++ b/src/python/espressomd/cuda_init.pxd @@ -16,9 +16,22 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . # + +from libcpp.vector cimport vector + cdef extern from "cuda_init.hpp": - int cuda_set_device(int dev) except + - int cuda_get_device() - cdef int cuda_get_n_gpus() - void cuda_get_gpu_name(int dev, char name[64]) -# int getdevicelist(int* devl, char* devname) + cdef struct EspressoGpuDevice: + int id + char name[64] + char proc_name[64] + int node + int compute_capability_major + int compute_capability_minor + size_t total_memory + int n_cores + + void cuda_set_device(int dev) except + + int cuda_get_device() except + + int cuda_get_n_gpus() except + + void cuda_get_gpu_name(int dev, char name[64]) except + + vector[EspressoGpuDevice] cuda_gather_gpus() diff --git a/src/python/espressomd/cuda_init.pyx b/src/python/espressomd/cuda_init.pyx index 7d611bd769f..8e70aab0e4d 100644 --- a/src/python/espressomd/cuda_init.pyx +++ b/src/python/espressomd/cuda_init.pyx @@ -18,6 +18,7 @@ # include "myconfig.pxi" from . cimport cuda_init +from . import utils cdef class CudaInitHandle: def __init__(self): @@ -37,12 +38,10 @@ cdef class CudaInitHandle: """ dev = cuda_get_device() - if dev == -1: - raise Exception("cuda device get error") return dev @device.setter - def device(self, int _dev): + def device(self, int dev): """ Specify which device to use. @@ -52,35 +51,72 @@ cdef class CudaInitHandle: Set the device id of the graphics card to use. """ - cuda_set_device(_dev) + cuda_set_device(dev) - IF CUDA == 1: - @property - def device_list(self): + def list_devices(self): """ List devices. Returns ------- - :obj:`list` : + :obj:`dict` : List of available CUDA devices. """ cdef char gpu_name_buffer[4 + 64] + n_gpus = 0 + try: + n_gpus = cuda_get_n_gpus() + except RuntimeError: + pass devices = dict() - for i in range(cuda_get_n_gpus()): - cuda_get_gpu_name(i, gpu_name_buffer) - devices[i] = gpu_name_buffer + for i in range(n_gpus): + try: + cuda_get_gpu_name(i, gpu_name_buffer) + except RuntimeError: + continue + devices[i] = utils.to_str(gpu_name_buffer) return devices - @device_list.setter - def device_list(self, dict _dev_dict): - raise Exception("cuda device list is read only") + def list_devices_properties(self): + """ + List devices with their properties on each host machine. + Returns + ------- + :obj:`dict` : + List of available CUDA devices with their properties. + + """ + cdef vector[EspressoGpuDevice] devices + cdef EspressoGpuDevice dev + try: + devices = cuda_gather_gpus() + except RuntimeError: + pass + resources = dict() + for i in range(devices.size()): + dev = devices[i] + hostname = utils.to_str(dev.proc_name) + if hostname not in resources: + resources[hostname] = {} + resources[hostname][dev.id] = { + 'name': utils.to_str(dev.name), + 'compute_capability': ( + dev.compute_capability_major, + dev.compute_capability_minor + ), + 'cores': dev.n_cores, + 'total_memory': dev.total_memory, + } + return resources IF CUDA: def gpu_available(): - return cuda_get_n_gpus() > 0 + try: + return cuda_get_n_gpus() > 0 + except RuntimeError: + return False ELSE: def gpu_available(): return False diff --git a/src/python/espressomd/electrokinetics.pxd b/src/python/espressomd/electrokinetics.pxd index bdbf7cc34ce..7be18e61e6f 100644 --- a/src/python/espressomd/electrokinetics.pxd +++ b/src/python/espressomd/electrokinetics.pxd @@ -28,53 +28,103 @@ IF ELECTROKINETICS and CUDA: DEF MAX_NUMBER_OF_SPECIES = 10 # EK data struct - ctypedef struct EK_parameters: - float agrid - float time_step - float lb_density - unsigned int dim_x - unsigned int dim_y - unsigned int dim_z - unsigned int number_of_nodes - float viscosity - float bulk_viscosity - float gamma_odd - float gamma_even - float friction - float T - float prefactor - float lb_force_density[3] - unsigned int number_of_species - int reaction_species[3] - float rho_reactant_reservoir - float rho_product0_reservoir - float rho_product1_reservoir - float reaction_ct_rate - float reaction_fraction_0 - float reaction_fraction_1 - float mass_reactant - float mass_product0 - float mass_product1 - int stencil - int number_of_boundary_nodes - float fluctuation_amplitude - bool fluctuations - bool advection - bool fluidcoupling_ideal_contribution - float * charge_potential - ekfloat * j - float * lb_force_density_previous - ekfloat * rho[MAX_NUMBER_OF_SPECIES] - int species_index[MAX_NUMBER_OF_SPECIES] - float density[MAX_NUMBER_OF_SPECIES] - float D[MAX_NUMBER_OF_SPECIES] - float d[MAX_NUMBER_OF_SPECIES] - float valency[MAX_NUMBER_OF_SPECIES] - float ext_force_density[3][MAX_NUMBER_OF_SPECIES] - char * node_is_catalyst - bool es_coupling - float * charge_potential_buffer - float * electric_field + IF EK_DEBUG: + ctypedef struct EK_parameters: + float agrid + float time_step + float lb_density + unsigned int dim_x + unsigned int dim_y + unsigned int dim_z + unsigned int number_of_nodes + float viscosity + float bulk_viscosity + float gamma_odd + float gamma_even + float friction + float T + float prefactor + float lb_force_density[3] + unsigned int number_of_species + int reaction_species[3] + float rho_reactant_reservoir + float rho_product0_reservoir + float rho_product1_reservoir + float reaction_ct_rate + float reaction_fraction_0 + float reaction_fraction_1 + float mass_reactant + float mass_product0 + float mass_product1 + int stencil + int number_of_boundary_nodes + float fluctuation_amplitude + bool fluctuations + bool advection + bool fluidcoupling_ideal_contribution + float * charge_potential + ekfloat * j + float * lb_force_density_previous + ekfloat * j_fluc + ekfloat * rho[MAX_NUMBER_OF_SPECIES] + int species_index[MAX_NUMBER_OF_SPECIES] + float density[MAX_NUMBER_OF_SPECIES] + float D[MAX_NUMBER_OF_SPECIES] + float d[MAX_NUMBER_OF_SPECIES] + float valency[MAX_NUMBER_OF_SPECIES] + float ext_force_density[3][MAX_NUMBER_OF_SPECIES] + char * node_is_catalyst + bool es_coupling + float * charge_potential_buffer + float * electric_field + ELSE: + ctypedef struct EK_parameters: + float agrid + float time_step + float lb_density + unsigned int dim_x + unsigned int dim_y + unsigned int dim_z + unsigned int number_of_nodes + float viscosity + float bulk_viscosity + float gamma_odd + float gamma_even + float friction + float T + float prefactor + float lb_force_density[3] + unsigned int number_of_species + int reaction_species[3] + float rho_reactant_reservoir + float rho_product0_reservoir + float rho_product1_reservoir + float reaction_ct_rate + float reaction_fraction_0 + float reaction_fraction_1 + float mass_reactant + float mass_product0 + float mass_product1 + int stencil + int number_of_boundary_nodes + float fluctuation_amplitude + bool fluctuations + bool advection + bool fluidcoupling_ideal_contribution + float * charge_potential + ekfloat * j + float * lb_force_density_previous + ekfloat * rho[MAX_NUMBER_OF_SPECIES] + int species_index[MAX_NUMBER_OF_SPECIES] + float density[MAX_NUMBER_OF_SPECIES] + float D[MAX_NUMBER_OF_SPECIES] + float d[MAX_NUMBER_OF_SPECIES] + float valency[MAX_NUMBER_OF_SPECIES] + float ext_force_density[3][MAX_NUMBER_OF_SPECIES] + char * node_is_catalyst + bool es_coupling + float * charge_potential_buffer + float * electric_field cdef extern EK_parameters ek_parameters diff --git a/src/python/espressomd/electrokinetics.pyx b/src/python/espressomd/electrokinetics.pyx index d24e2cf9b13..73edb490835 100644 --- a/src/python/espressomd/electrokinetics.pyx +++ b/src/python/espressomd/electrokinetics.pyx @@ -26,7 +26,7 @@ from . import utils import tempfile import shutil from .utils import is_valid_type -from .utils cimport Vector3i, Vector6d, handle_errors +from .utils cimport Vector3i, Vector6d import numpy as np IF ELECTROKINETICS: @@ -366,7 +366,7 @@ IF ELECTROKINETICS: self.node[1] = key[1] self.node[2] = key[2] if not lb_lbnode_is_index_valid(self.node): - raise ValueError("LB node index out of bounds") + raise IndexError("LB node index out of bounds") property potential: def __get__(self): @@ -544,7 +544,7 @@ IF ELECTROKINETICS: self.node[2] = key[2] self.id = id if not lb_lbnode_is_index_valid(self.node): - raise ValueError("LB node index out of bounds") + raise IndexError("LB node index out of bounds") property density: def __set__(self, value): @@ -572,4 +572,4 @@ IF ELECTROKINETICS: self.id, self.node[0], self.node[1], self.node[2], flux) != 0: raise Exception("Species has not been added to EK.") - return np.array(flux[0], flux[1], flux[2]) + return np.array([flux[0], flux[1], flux[2]]) diff --git a/src/python/espressomd/electrostatic_extensions.pxd b/src/python/espressomd/electrostatic_extensions.pxd index cc35493a54f..c119a540e78 100644 --- a/src/python/espressomd/electrostatic_extensions.pxd +++ b/src/python/espressomd/electrostatic_extensions.pxd @@ -20,31 +20,13 @@ include "myconfig.pxi" from .electrostatics cimport * from libcpp.vector cimport vector -from libcpp cimport bool from .utils cimport Vector3d IF ELECTROSTATICS and P3M: - cdef extern from "electrostatics_magnetostatics/elc.hpp": - ctypedef struct ELC_struct: - double maxPWerror - double gap_size - double far_cut - bool neutralize - double delta_mid_top, - double delta_mid_bot, - bool const_pot, - double pot_diff - - int ELC_set_params(double maxPWerror, double min_dist, double far_cut, - bool neutralize, double delta_mid_top, double delta_mid_bot, bool const_pot, double pot_diff) - - # links intern C-struct with python object - ELC_struct elc_params - cdef extern from "electrostatics_magnetostatics/icc.hpp": - ctypedef struct iccp3m_struct: - int n_ic + ctypedef struct icc_struct: + int n_icc int num_iteration double eout vector[double] areas @@ -58,7 +40,14 @@ IF ELECTROSTATICS and P3M: int first_id # links intern C-struct with python object - iccp3m_struct iccp3m_cfg + cdef extern icc_struct icc_cfg + + void icc_set_params(int n_icc, double convergence, double relaxation, + Vector3d & ext_field, int max_iterations, + int first_id, double eps_out, + vector[double] & areas, + vector[double] & e_in, + vector[double] & sigma, + vector[Vector3d] & normals) except + - void iccp3m_alloc_lists() - int mpi_iccp3m_init() + void icc_deactivate() diff --git a/src/python/espressomd/electrostatic_extensions.pyx b/src/python/espressomd/electrostatic_extensions.pyx index 78523d23900..6b289e402a1 100644 --- a/src/python/espressomd/electrostatic_extensions.pyx +++ b/src/python/espressomd/electrostatic_extensions.pyx @@ -22,7 +22,9 @@ include "myconfig.pxi" from . cimport actors from . import actors import numpy as np -from .utils cimport handle_errors, check_type_or_throw_except, check_range_or_except +from .utils import handle_errors, array_locked +from .utils cimport check_type_or_throw_except, check_range_or_except, Vector3d, make_Vector3d, make_array_locked, make_array_locked_vector +from libcpp.vector cimport vector IF ELECTROSTATICS and P3M: from espressomd.electrostatics import check_neutrality @@ -30,126 +32,6 @@ IF ELECTROSTATICS and P3M: cdef class ElectrostaticExtensions(actors.Actor): pass - cdef class ELC(ElectrostaticExtensions): - """ - Electrostatics solver for systems with two periodic dimensions. - See :ref:`Electrostatic Layer Correction (ELC)` for more details. - - Parameters - ---------- - gap_size : :obj:`float`, required - The gap size gives the height :math:`h` of the empty region between - the system box and the neighboring artificial images. |es| does not - make sure that the gap is actually empty, this is the user's - responsibility. The method will run even if the condition is not - fulfilled, however, the error bound will not be reached. Therefore - you should really make sure that the gap region is empty (e.g. - with wall constraints). - maxPWerror : :obj:`float`, required - The maximal pairwise error sets the least upper bound (LUB) error - of the force between any two charges without prefactors (see the - papers). The algorithm tries to find parameters to meet this LUB - requirements or will throw an error if there are none. - delta_mid_top : :obj:`float`, optional - Dielectric contrast :math:`\\Delta_t` between the upper boundary - and the simulation box. - delta_mid_bottom : :obj:`float`, optional - Dielectric contrast :math:`\\Delta_b` between the lower boundary - and the simulation box. - const_pot : :obj:`bool`, optional - Activate a constant electric potential between the top and bottom - of the simulation box. - pot_diff : :obj:`float`, optional - If ``const_pot`` is enabled, this parameter controls the applied - voltage between the boundaries of the simulation box in the - *z*-direction (at :math:`z = 0` and :math:`z = L_z - h`). - neutralize : :obj:`bool`, optional - By default, *ELC* just as P3M adds a homogeneous neutralizing - background to the system in case of a net charge. However, unlike - in three dimensions, this background adds a parabolic potential - across the slab :cite:`ballenegger09a`. Therefore, under normal - circumstances, you will probably want to disable the neutralization - for non-neutral systems. This corresponds then to a formal - regularization of the forces and energies :cite:`ballenegger09a`. - Also, if you add neutralizing walls explicitly as constraints, you - have to disable the neutralization. When using a dielectric - contrast or full metallic walls (``delta_mid_top != 0`` or - ``delta_mid_bot != 0`` or ``const_pot=True``), ``neutralize`` is - overwritten and switched off internally. Note that the special - case of non-neutral systems with a *non-metallic* dielectric jump - (e.g. ``delta_mid_top`` or ``delta_mid_bot`` in ``]-1,1[``) is not - covered by the algorithm and will throw an error. - far_cut : :obj:`float`, optional - Cutoff radius, use with care, intended for testing purposes. When - setting the cutoff directly, the maximal pairwise error is ignored. - """ - - def validate_params(self): - default_params = self.default_params() - check_type_or_throw_except( - self._params["maxPWerror"], 1, float, "") - check_range_or_except( - self._params, "maxPWerror", 0, False, "inf", True) - check_type_or_throw_except(self._params["gap_size"], 1, float, "") - check_range_or_except( - self._params, "gap_size", 0, False, "inf", True) - check_type_or_throw_except(self._params["far_cut"], 1, float, "") - check_type_or_throw_except( - self._params["neutralize"], 1, type(True), "") - - def valid_keys(self): - return ["maxPWerror", "gap_size", "far_cut", "neutralize", - "delta_mid_top", "delta_mid_bot", "const_pot", "pot_diff", - "check_neutrality"] - - def required_keys(self): - return ["maxPWerror", "gap_size"] - - def default_params(self): - return {"maxPWerror": -1, - "gap_size": -1, - "far_cut": -1, - "delta_mid_top": 0, - "delta_mid_bot": 0, - "const_pot": False, - "pot_diff": 0.0, - "neutralize": True, - "check_neutrality": True} - - def _get_params_from_es_core(self): - params = {} - params.update(elc_params) - return params - - def _set_params_in_es_core(self): - if coulomb.method == COULOMB_P3M_GPU: - raise Exception( - "ELC tuning failed, ELC is not set up to work with the GPU P3M") - - if self._params["const_pot"]: - self._params["delta_mid_top"] = -1 - self._params["delta_mid_bot"] = -1 - - if ELC_set_params( - self._params["maxPWerror"], - self._params["gap_size"], - self._params["far_cut"], - self._params["neutralize"], - self._params["delta_mid_top"], - self._params["delta_mid_bot"], - self._params["const_pot"], - self._params["pot_diff"]): - handle_errors( - "ELC tuning failed, ELC is not set up to work with the GPU P3M") - - def _activate_method(self): - check_neutrality(self._params) - self._set_params_in_es_core() - - def _deactivate_method(self): - raise Exception( - "Unable to remove ELC as the state of the underlying electrostatics method will remain unclear.") - cdef class ICC(ElectrostaticExtensions): """ Interface to the induced charge calculation scheme for dielectric @@ -180,47 +62,35 @@ IF ELECTROSTATICS and P3M: sigmas : (``n_icc``, ) array_like :obj:`float`, optional Additional surface charge density in the absence of any charge induction. - epsilons : (``n_icc``, ) array_like :obj:`float`, optional + epsilons : (``n_icc``, ) array_like :obj:`float` Dielectric constant associated to the areas. """ def validate_params(self): - default_params = self.default_params() - check_type_or_throw_except(self._params["n_icc"], 1, int, "") - check_range_or_except( - self._params, "n_icc", 1, True, "inf", True) + + check_type_or_throw_except( + self._params["first_id"], 1, int, "") check_type_or_throw_except( self._params["convergence"], 1, float, "") - check_range_or_except( - self._params, "convergence", 0, False, "inf", True) check_type_or_throw_except( self._params["relaxation"], 1, float, "") - check_range_or_except( - self._params, "relaxation", 0, False, "inf", True) check_type_or_throw_except( self._params["ext_field"], 3, float, "") check_type_or_throw_except( self._params["max_iterations"], 1, int, "") - check_range_or_except( - self._params, "max_iterations", 0, False, "inf", True) - - check_type_or_throw_except( - self._params["first_id"], 1, int, "") - check_range_or_except( - self._params, "first_id", 0, True, "inf", True) check_type_or_throw_except( self._params["eps_out"], 1, float, "") n_icc = self._params["n_icc"] + assert n_icc >= 0, "ICC: invalid number of particles" - # Required list input self._params["normals"] = np.array(self._params["normals"]) if self._params["normals"].size != n_icc * 3: raise ValueError( @@ -231,18 +101,14 @@ IF ELECTROSTATICS and P3M: check_type_or_throw_except( self._params["areas"], n_icc, float, "Error in area list.") - # Not Required if "sigmas" in self._params.keys(): check_type_or_throw_except( self._params["sigmas"], n_icc, float, "Error in sigma list.") else: self._params["sigmas"] = np.zeros(n_icc) - if "epsilons" in self._params.keys(): - check_type_or_throw_except( - self._params["epsilons"], n_icc, float, "Error in epsilon list.") - else: - self._params["epsilons"] = np.zeros(n_icc) + check_type_or_throw_except( + self._params["epsilons"], n_icc, float, "Error in epsilon list.") def valid_keys(self): return ["n_icc", "convergence", "relaxation", "ext_field", @@ -250,92 +116,68 @@ IF ELECTROSTATICS and P3M: "areas", "sigmas", "epsilons", "check_neutrality"] def required_keys(self): - return ["n_icc", "normals", "areas"] + return ["n_icc", "normals", "areas", "epsilons"] def default_params(self): - return {"n_icc": 0, - "convergence": 1e-3, + return {"convergence": 1e-3, "relaxation": 0.7, "ext_field": [0, 0, 0], "max_iterations": 100, "first_id": 0, - "esp_out": 1, - "normals": [], - "areas": [], - "sigmas": [], - "epsilons": [], + "eps_out": 1, "check_neutrality": True} def _get_params_from_es_core(self): params = {} - params["n_icc"] = iccp3m_cfg.n_ic - - # Fill Lists - normals = [] - areas = [] - sigmas = [] - epsilons = [] - for i in range(iccp3m_cfg.n_ic): - normals.append([iccp3m_cfg.normals[i][0], iccp3m_cfg.normals[ - i][1], iccp3m_cfg.normals[i][2]]) - areas.append(iccp3m_cfg.areas[i]) - epsilons.append(iccp3m_cfg.ein[i]) - sigmas.append(iccp3m_cfg.sigma[i]) - - params["normals"] = normals - params["areas"] = areas - params["epsilons"] = epsilons - params["sigmas"] = sigmas - - params["ext_field"] = [iccp3m_cfg.ext_field[0], - iccp3m_cfg.ext_field[1], iccp3m_cfg.ext_field[2]] - params["first_id"] = iccp3m_cfg.first_id - params["max_iterations"] = iccp3m_cfg.num_iteration - params["convergence"] = iccp3m_cfg.convergence - params["relaxation"] = iccp3m_cfg.relax - params["eps_out"] = iccp3m_cfg.eout + params["n_icc"] = icc_cfg.n_icc + params["first_id"] = icc_cfg.first_id + params["max_iterations"] = icc_cfg.num_iteration + params["convergence"] = icc_cfg.convergence + params["relaxation"] = icc_cfg.relax + params["eps_out"] = icc_cfg.eout + params["normals"] = make_array_locked_vector(icc_cfg.normals) + params["areas"] = array_locked(icc_cfg.areas) + params["epsilons"] = array_locked(icc_cfg.ein) + params["sigmas"] = array_locked(icc_cfg.sigma) + params["ext_field"] = make_array_locked(icc_cfg.ext_field) return params def _set_params_in_es_core(self): - # First set number of icc particles - iccp3m_cfg.n_ic = self._params["n_icc"] - # Allocate ICC lists - iccp3m_alloc_lists() - - # Fill Lists - for i in range(iccp3m_cfg.n_ic): - iccp3m_cfg.normals[i][0] = self._params["normals"][i][0] - iccp3m_cfg.normals[i][1] = self._params["normals"][i][1] - iccp3m_cfg.normals[i][2] = self._params["normals"][i][2] - - iccp3m_cfg.areas[i] = self._params["areas"][i] - iccp3m_cfg.ein[i] = self._params["epsilons"][i] - iccp3m_cfg.sigma[i] = self._params["sigmas"][i] - - iccp3m_cfg.ext_field[0] = self._params["ext_field"][0] - iccp3m_cfg.ext_field[1] = self._params["ext_field"][1] - iccp3m_cfg.ext_field[2] = self._params["ext_field"][2] - iccp3m_cfg.first_id = self._params["first_id"] - iccp3m_cfg.num_iteration = self._params["max_iterations"] - iccp3m_cfg.convergence = self._params["convergence"] - iccp3m_cfg.relax = self._params["relaxation"] - iccp3m_cfg.eout = self._params["eps_out"] - - # Broadcasts vars - mpi_iccp3m_init() + cdef Vector3d ext_field = make_Vector3d(self._params["ext_field"]) + cdef vector[double] areas, e_in, sigma + cdef vector[Vector3d] normals + areas.resize(self._params["n_icc"]) + e_in.resize(self._params["n_icc"]) + sigma.resize(self._params["n_icc"]) + normals.resize(self._params["n_icc"]) + + for i in range(self._params["n_icc"]): + areas[i] = self._params["areas"][i] + e_in[i] = self._params["epsilons"][i] + sigma[i] = self._params["sigmas"][i] + + for j in range(3): + normals[i][j] = self._params["normals"][i][j] + + icc_set_params(self._params["n_icc"], + self._params["convergence"], + self._params["relaxation"], + ext_field, + self._params["max_iterations"], + self._params["first_id"], + self._params["eps_out"], + areas, + e_in, + sigma, + normals) def _activate_method(self): check_neutrality(self._params) self._set_params_in_es_core() def _deactivate_method(self): - iccp3m_cfg.n_ic = 0 - # Allocate ICC lists - iccp3m_alloc_lists() - - # Broadcasts vars - mpi_iccp3m_init() + icc_deactivate() def last_iterations(self): """ @@ -348,4 +190,4 @@ IF ELECTROSTATICS and P3M: Number of iterations """ - return iccp3m_cfg.citeration + return icc_cfg.citeration diff --git a/src/python/espressomd/electrostatics.pxd b/src/python/espressomd/electrostatics.pxd index b6c1452a41c..7f9d5e90651 100644 --- a/src/python/espressomd/electrostatics.pxd +++ b/src/python/espressomd/electrostatics.pxd @@ -18,8 +18,7 @@ # include "myconfig.pxi" -from .utils import is_valid_type, to_str, handle_errors -from .utils cimport handle_errors +from .utils import is_valid_type, to_str from libcpp cimport bool cdef extern from "SystemInterface.hpp": @@ -68,10 +67,10 @@ IF ELECTROSTATICS: from p3m_common cimport P3MParameters cdef extern from "electrostatics_magnetostatics/p3m.hpp": - int p3m_set_params(double r_cut, int * mesh, int cao, double alpha, double accuracy) - void p3m_set_tune_params(double r_cut, int mesh[3], int cao, double alpha, double accuracy) - int p3m_set_mesh_offset(double x, double y, double z) - int p3m_set_eps(double eps) + void p3m_set_params(double r_cut, int * mesh, int cao, double alpha, double accuracy) except + + void p3m_set_tune_params(double r_cut, int mesh[3], int cao, double accuracy) + void p3m_set_mesh_offset(double x, double y, double z) except + + void p3m_set_eps(double eps) int p3m_adaptive_tune(bool verbose) ctypedef struct p3m_data_struct: @@ -82,75 +81,25 @@ IF ELECTROSTATICS: IF CUDA: cdef extern from "electrostatics_magnetostatics/p3m_gpu.hpp": - void p3m_gpu_init(int cao, int * mesh, double alpha) - - cdef inline python_p3m_gpu_init(params): - cdef int cao - cdef int mesh[3] - cdef double alpha - cao = params["cao"] - # Mesh can be specified as single int, but here, an array is - # needed - if not hasattr(params["mesh"], "__getitem__"): - for i in range(3): - mesh[i] = params["mesh"] - else: - mesh = params["mesh"] - alpha = params["alpha"] - p3m_gpu_init(cao, mesh, alpha) - handle_errors("python_p3m_gpu_init") - - cdef inline python_p3m_set_mesh_offset(mesh_off): - cdef double mesh_offset[3] - mesh_offset[0] = mesh_off[0] - mesh_offset[1] = mesh_off[1] - mesh_offset[2] = mesh_off[2] - return p3m_set_mesh_offset( - mesh_offset[0], mesh_offset[1], mesh_offset[2]) - - cdef inline python_p3m_adaptive_tune(bool verbose): - cdef int response = p3m_adaptive_tune(verbose) - if response: - handle_errors("python_p3m_adaptive_tune") - - cdef inline python_p3m_set_params(p_r_cut, p_mesh, p_cao, p_alpha, p_accuracy): - cdef int mesh[3] - cdef double r_cut - cdef int cao - cdef double alpha - cdef double accuracy - r_cut = p_r_cut - cao = p_cao - alpha = p_alpha - accuracy = p_accuracy - if is_valid_type(p_mesh, int): - mesh[0] = p_mesh - mesh[1] = p_mesh - mesh[2] = p_mesh - else: - mesh = p_mesh - - return p3m_set_params(r_cut, mesh, cao, alpha, accuracy) - - cdef inline python_p3m_set_tune_params(p_r_cut, p_mesh, p_cao, p_alpha, p_accuracy): - cdef int mesh[3] - cdef double r_cut - cdef int cao - cdef double alpha - cdef double accuracy - r_cut = p_r_cut - cao = p_cao - alpha = p_alpha - accuracy = p_accuracy - - if is_valid_type(p_mesh, int): - mesh[0] = p_mesh - mesh[1] = p_mesh - mesh[2] = p_mesh - else: - mesh = p_mesh - - p3m_set_tune_params(r_cut, mesh, cao, alpha, accuracy) + void p3m_gpu_init(int cao, int * mesh, double alpha) except + + + cdef extern from "electrostatics_magnetostatics/elc.hpp": + ctypedef struct ELC_struct: + double maxPWerror + double gap_size + double far_cut + bool neutralize + double delta_mid_top + double delta_mid_bot + bool const_pot + double pot_diff + + int ELC_set_params(double maxPWerror, double min_dist, double far_cut, + bool neutralize, double delta_mid_top, + double delta_mid_bot, bool const_pot, double pot_diff) + + # links intern C-struct with python object + ELC_struct elc_params cdef extern from "electrostatics_magnetostatics/debye_hueckel.hpp": ctypedef struct Debye_hueckel_params: @@ -186,15 +135,6 @@ IF ELECTROSTATICS: int MMM1D_init() int mmm1d_tune(bool verbose) - cdef inline pyMMM1D_tune(bool verbose): - cdef int resp - resp = MMM1D_init() - if resp: - handle_errors("pyMMM1D_tune") - resp = mmm1d_tune(verbose) - if resp: - handle_errors("pyMMM1D_tune") - IF ELECTROSTATICS and MMM1D_GPU: cdef extern from "actor/Mmm1dgpuForce.hpp": diff --git a/src/python/espressomd/electrostatics.pyx b/src/python/espressomd/electrostatics.pyx index 0e9b1491be0..f1af5f2dcbe 100644 --- a/src/python/espressomd/electrostatics.pyx +++ b/src/python/espressomd/electrostatics.pyx @@ -24,11 +24,12 @@ import numpy as np IF SCAFACOS == 1: from .scafacos import ScafacosConnector from . cimport scafacos -from .utils cimport handle_errors -from .utils import is_valid_type, check_type_or_throw_except, to_str +from .utils import is_valid_type, check_type_or_throw_except, to_str, handle_errors +from .utils cimport check_range_or_except from . cimport checks from .analyze cimport partCfg, PartCfg from .particle_data cimport particle +import sys IF ELECTROSTATICS == 1: @@ -192,92 +193,21 @@ IF ELECTROSTATICS: IF P3M == 1: - cdef class P3M(ElectrostaticInteraction): - """ - P3M electrostatics solver. - - Particle--Particle--Particle--Mesh (P3M) is a Fourier-based Ewald - summation method to calculate potentials in N-body simulation. - See :ref:`Coulomb P3M` for more details. - - Parameters - ---------- - prefactor : :obj:`float` - Electrostatics prefactor (see :eq:`coulomb_prefactor`). - accuracy : :obj:`float` - P3M tunes its parameters to provide this target accuracy. - alpha : :obj:`float`, optional - The Ewald parameter. - cao : :obj:`float`, optional - The charge-assignment order, an integer between 0 and 7. - epsilon : :obj:`float` or :obj:`str`, optional - A positive number for the dielectric constant of the - surrounding medium. Use ``'metallic'`` to set the dielectric - constant of the surrounding medium to infinity (default). - mesh : :obj:`int` or (3,) array_like of :obj:`int`, optional - The number of mesh points in x, y and z direction. Use a single - value for cubic boxes. - r_cut : :obj:`float`, optional - The real space cutoff. - tune : :obj:`bool`, optional - Used to activate/deactivate the tuning method on activation. - Defaults to ``True``. - check_neutrality : :obj:`bool`, optional - Raise a warning if the system is not electrically neutral when - set to ``True`` (default). - - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + cdef class _P3MBase(ElectrostaticInteraction): - def validate_params(self): - default_params = self.default_params() - if not (self._params["prefactor"] > 0.0): - raise ValueError("prefactor should be a positive float") - - if not (self._params["r_cut"] >= 0 - or self._params["r_cut"] == default_params["r_cut"]): - raise ValueError("P3M r_cut has to be >=0") - - if is_valid_type(self._params["mesh"], int): - if self._params["mesh"] % 2 != 0 and self._params["mesh"] != -1: - raise ValueError( - "P3M requires an even number of mesh points in all directions") + cdef _check_and_copy_mesh_size(self, int mesh[3], pmesh): + if is_valid_type(pmesh, int): + pmesh = 3 * [pmesh] else: - check_type_or_throw_except(self._params["mesh"], 3, int, - "P3M mesh has to be an integer or integer list of length 3") - if (self._params["mesh"][0] % 2 != 0 and self._params["mesh"][0] != -1) or \ - (self._params["mesh"][1] % 2 != 0 and self._params["mesh"][1] != -1) or \ - (self._params["mesh"][2] % 2 != 0 and self._params["mesh"][2] != -1): - raise ValueError( - "P3M requires an even number of mesh points in all directions") - - if not (self._params["cao"] >= -1 and self._params["cao"] <= 7): - raise ValueError( - "P3M cao has to be an integer between -1 and 7") - - if self._params["tune"] and not (self._params["accuracy"] >= 0): - raise ValueError("P3M accuracy has to be positive") - - if self._params["epsilon"] == "metallic": - self._params["epsilon"] = 0.0 - - check_type_or_throw_except( - self._params["epsilon"], 1, float, - "epsilon should be a double or 'metallic'") - - if self._params["mesh_off"] != default_params["mesh_off"]: - check_type_or_throw_except(self._params["mesh_off"], 3, float, - "mesh_off should be a (3,) array_like of values between 0.0 and 1.0") - - if not (self._params["alpha"] == default_params["alpha"] - or self._params["alpha"] > 0): - raise ValueError("alpha should be positive") + check_type_or_throw_except( + pmesh, 3, int, "mesh size must be 3 ints") + for i in range(3): + mesh[i] = pmesh[i] def valid_keys(self): return ["mesh", "cao", "accuracy", "epsilon", "alpha", "r_cut", - "prefactor", "tune", "check_neutrality", "verbose"] + "prefactor", "tune", "check_neutrality", "verbose", + "mesh_off"] def required_keys(self): return ["prefactor", "accuracy"] @@ -301,19 +231,18 @@ IF P3M == 1: params["tune"] = self._params["tune"] return params - def _set_params_in_es_core(self): - # Sets lb, bcast, resets vars to zero if lb=0 + def _tune(self): + cdef int mesh[3] + self._check_and_copy_mesh_size(mesh, self._params["mesh"]) + set_prefactor(self._params["prefactor"]) - # Sets cdef vars and calls p3m_set_params() in core - python_p3m_set_params(self._params["r_cut"], - self._params["mesh"], self._params["cao"], - self._params["alpha"], self._params["accuracy"]) - # p3m_set_params() -> set r_cuts, mesh, cao, validates sanity, bcasts - # Careful: bcast calls on_coulomb_change(), which calls p3m_init(), - # which resets r_cut if lb is zero. OK. - # Sets eps, bcast p3m_set_eps(self._params["epsilon"]) - python_p3m_set_mesh_offset(self._params["mesh_off"]) + p3m_set_tune_params(self._params["r_cut"], mesh, + self._params["cao"], self._params["accuracy"]) + tuning_error = p3m_adaptive_tune(self._params["verbose"]) + if tuning_error: + handle_errors("P3M: tuning failed") + self._params.update(self._get_params_from_es_core()) def tune(self, **tune_params_subset): # update the three necessary parameters if not provided by the user @@ -324,25 +253,97 @@ IF P3M == 1: super().tune(**tune_params_subset) - def _tune(self): + def _set_params_in_es_core(self): + cdef int mesh[3] + self._check_and_copy_mesh_size(mesh, self._params["mesh"]) + set_prefactor(self._params["prefactor"]) + # Sets p3m parameters + # p3m_set_params() -> set parameters and bcasts + # Careful: calls on_coulomb_change(), which calls p3m_init(), + # which resets r_cut if prefactor=0 + p3m_set_params(self._params["r_cut"], mesh, self._params["cao"], + self._params["alpha"], self._params["accuracy"]) + # Sets eps, bcast p3m_set_eps(self._params["epsilon"]) - python_p3m_set_tune_params(self._params["r_cut"], - self._params["mesh"], - self._params["cao"], - -1.0, - self._params["accuracy"]) - python_p3m_adaptive_tune(self._params["verbose"]) - self._params.update(self._get_params_from_es_core()) + p3m_set_mesh_offset(self._params["mesh_off"][0], + self._params["mesh_off"][1], + self._params["mesh_off"][2]) + + def validate_params(self): + default_params = self.default_params() + if not (self._params["prefactor"] > 0.0): + raise ValueError("prefactor should be a positive float") + + if is_valid_type(self._params["mesh"], int): + if self._params["mesh"] % 2 != 0 and self._params["mesh"] != -1: + raise ValueError( + "P3M requires an even number of mesh points in all directions") + else: + check_type_or_throw_except(self._params["mesh"], 3, int, + "P3M mesh has to be an integer or integer list of length 3") + if (self._params["mesh"][0] % 2 != 0 and self._params["mesh"][0] != -1) or \ + (self._params["mesh"][1] % 2 != 0 and self._params["mesh"][1] != -1) or \ + (self._params["mesh"][2] % 2 != 0 and self._params["mesh"][2] != -1): + raise ValueError( + "P3M requires an even number of mesh points in all directions") + + if self._params["epsilon"] == "metallic": + self._params["epsilon"] = 0.0 + + check_type_or_throw_except( + self._params["epsilon"], 1, float, + "epsilon should be a double or 'metallic'") + + if self._params["mesh_off"] != default_params["mesh_off"]: + check_type_or_throw_except(self._params["mesh_off"], 3, float, + "mesh_off should be a (3,) array_like of values between 0.0 and 1.0") + + cdef class P3M(_P3MBase): + """ + P3M electrostatics solver. + + Particle--Particle--Particle--Mesh (P3M) is a Fourier-based Ewald + summation method to calculate potentials in N-body simulation. + See :ref:`Coulomb P3M` for more details. + + Parameters + ---------- + prefactor : :obj:`float` + Electrostatics prefactor (see :eq:`coulomb_prefactor`). + accuracy : :obj:`float` + P3M tunes its parameters to provide this target accuracy. + alpha : :obj:`float`, optional + The Ewald parameter. + cao : :obj:`float`, optional + The charge-assignment order, an integer between 0 and 7. + epsilon : :obj:`float` or :obj:`str`, optional + A positive number for the dielectric constant of the + surrounding medium. Use ``'metallic'`` to set the dielectric + constant of the surrounding medium to infinity (default). + mesh : :obj:`int` or (3,) array_like of :obj:`int`, optional + The number of mesh points in x, y and z direction. Use a single + value for cubic boxes. + r_cut : :obj:`float`, optional + The real space cutoff. + tune : :obj:`bool`, optional + Used to activate/deactivate the tuning method on activation. + Defaults to ``True``. + check_neutrality : :obj:`bool`, optional + Raise a warning if the system is not electrically neutral when + set to ``True`` (default). + + """ def _activate_method(self): check_neutrality(self._params) if self._params["tune"]: self._tune() self._set_params_in_es_core() + handle_errors("P3M: initialization failed") IF CUDA: - cdef class P3MGPU(ElectrostaticInteraction): + cdef class P3MGPU(_P3MBase): """ P3M electrostatics solver with GPU support. @@ -378,114 +379,158 @@ IF P3M == 1: """ - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def _activate_method(self): + cdef int mesh[3] + self._check_and_copy_mesh_size(mesh, self._params["mesh"]) - def validate_params(self): - default_params = self.default_params() + check_neutrality(self._params) + p3m_gpu_init(self._params["cao"], mesh, self._params["alpha"]) + handle_errors("P3M: tuning failed") + coulomb.method = COULOMB_P3M_GPU + if self._params["tune"]: + self._tune() + p3m_gpu_init(self._params["cao"], mesh, self._params["alpha"]) + handle_errors("P3M: tuning failed") + self._set_params_in_es_core() - if not (self._params["r_cut"] >= 0 - or self._params["r_cut"] == default_params["r_cut"]): - raise ValueError("P3M r_cut has to be >=0") + def _set_params_in_es_core(self): + super()._set_params_in_es_core() + handle_errors("P3M: initialization failed") - if is_valid_type(self._params["mesh"], int): - if self._params["mesh"] % 2 != 0 and self._params["mesh"] != -1: - raise ValueError( - "P3M requires an even number of mesh points in all directions") - else: - check_type_or_throw_except(self._params["mesh"], 3, int, - "P3M mesh has to be an integer or integer list of length 3") - if (self._params["mesh"][0] % 2 != 0 and self._params["mesh"][0] != -1) or \ - (self._params["mesh"][1] % 2 != 0 and self._params["mesh"][1] != -1) or \ - (self._params["mesh"][2] % 2 != 0 and self._params["mesh"][2] != -1): - raise ValueError( - "P3M requires an even number of mesh points in all directions") - - if not (self._params["cao"] >= -1 - and self._params["cao"] <= 7): - raise ValueError( - "P3M cao has to be an integer between -1 and 7") + cdef class ELC(ElectrostaticInteraction): + """ + Electrostatics solver for systems with two periodic dimensions. + See :ref:`Electrostatic Layer Correction (ELC)` for more details. - if not (self._params["accuracy"] >= 0): - raise ValueError("P3M accuracy has to be positive") + Parameters + ---------- + p3m_actor : :obj:`P3M`, required + Base P3M actor. + gap_size : :obj:`float`, required + The gap size gives the height :math:`h` of the empty region between + the system box and the neighboring artificial images. |es| checks + that the gap is empty and will throw an error if it isn't. Therefore + you should really make sure that the gap region is empty (e.g. + with wall constraints). + maxPWerror : :obj:`float`, required + The maximal pairwise error sets the least upper bound (LUB) error + of the force between any two charges without prefactors (see the + papers). The algorithm tries to find parameters to meet this LUB + requirements or will throw an error if there are none. + delta_mid_top : :obj:`float`, optional + Dielectric contrast :math:`\\Delta_t` between the upper boundary + and the simulation box. + delta_mid_bottom : :obj:`float`, optional + Dielectric contrast :math:`\\Delta_b` between the lower boundary + and the simulation box. + const_pot : :obj:`bool`, optional + Activate a constant electric potential between the top and bottom + of the simulation box. + pot_diff : :obj:`float`, optional + If ``const_pot`` is enabled, this parameter controls the applied + voltage between the boundaries of the simulation box in the + *z*-direction (at :math:`z = 0` and :math:`z = L_z - h`). + neutralize : :obj:`bool`, optional + By default, *ELC* just as P3M adds a homogeneous neutralizing + background to the system in case of a net charge. However, unlike + in three dimensions, this background adds a parabolic potential + across the slab :cite:`ballenegger09a`. Therefore, under normal + circumstances, you will probably want to disable the neutralization + for non-neutral systems. This corresponds then to a formal + regularization of the forces and energies :cite:`ballenegger09a`. + Also, if you add neutralizing walls explicitly as constraints, you + have to disable the neutralization. When using a dielectric + contrast or full metallic walls (``delta_mid_top != 0`` or + ``delta_mid_bot != 0`` or ``const_pot=True``), ``neutralize`` is + overwritten and switched off internally. Note that the special + case of non-neutral systems with a *non-metallic* dielectric jump + (e.g. ``delta_mid_top`` or ``delta_mid_bot`` in ``]-1,1[``) is not + covered by the algorithm and will throw an error. + far_cut : :obj:`float`, optional + Cutoff radius, use with care, intended for testing purposes. When + setting the cutoff directly, the maximal pairwise error is ignored. + """ - if self._params["epsilon"] == "metallic": - self._params["epsilon"] = 0.0 + def validate_params(self): + # P3M + if CUDA: + if isinstance(self._params["p3m_actor"], P3MGPU): + raise ValueError( + "ELC is not set up to work with the GPU P3M") + check_type_or_throw_except( + self._params["p3m_actor"], 1, getattr( + sys.modules[__name__], "P3M"), + "p3m_actor has to be a P3M solver") + self._params["p3m_actor"]._params["epsilon"] = 0.0 + self._params["p3m_actor"].validate_params() + # ELC + check_type_or_throw_except( + self._params["maxPWerror"], 1, float, + "maxPWerror has to be a float") + check_range_or_except( + self._params, "maxPWerror", 0, False, "inf", True) + check_type_or_throw_except(self._params["gap_size"], 1, float, + "gap_size has to be a float") + check_range_or_except( + self._params, "gap_size", 0, False, "inf", True) + check_type_or_throw_except(self._params["far_cut"], 1, float, + "far_cut has to be a float") + check_type_or_throw_except( + self._params["neutralize"], 1, type(True), + "neutralize has to be a bool") - check_type_or_throw_except( - self._params["epsilon"], 1, float, - "epsilon should be a double or 'metallic'") + def valid_keys(self): + return ["p3m_actor", "maxPWerror", "gap_size", "far_cut", + "neutralize", "delta_mid_top", "delta_mid_bot", + "const_pot", "pot_diff", "check_neutrality"] - if self._params["mesh_off"] != default_params["mesh_off"]: - check_type_or_throw_except(self._params["mesh_off"], 3, float, - "mesh_off should be a (3,) array_like of values between 0.0 and 1.0") + def required_keys(self): + return ["p3m_actor", "maxPWerror", "gap_size"] - def valid_keys(self): - return ["mesh", "cao", "accuracy", "epsilon", "alpha", "r_cut", - "prefactor", "tune", "check_neutrality", "verbose"] + def default_params(self): + return {"maxPWerror": -1, + "gap_size": -1, + "far_cut": -1, + "delta_mid_top": 0, + "delta_mid_bot": 0, + "const_pot": False, + "pot_diff": 0.0, + "neutralize": True, + "check_neutrality": True} - def required_keys(self): - return ["prefactor", "accuracy"] + def _get_params_from_es_core(self): + params = {} + params.update(elc_params) + params["p3m_actor"] = self._params["p3m_actor"] + return params - def default_params(self): - return {"cao": 0, - "r_cut": -1, - "alpha": 0, - "accuracy": 0, - "mesh": [0, 0, 0], - "epsilon": 0.0, - "mesh_off": [-1, -1, -1], - "tune": True, - "check_neutrality": True, - "verbose": True} - - def _get_params_from_es_core(self): - params = {} - params.update(p3m.params) - params["prefactor"] = coulomb.prefactor - params["tune"] = self._params["tune"] - return params - - def tune(self, **tune_params_subset): - # update the three necessary parameters if not provided by the - # user - default_params = self.default_params() - for key in ["r_cut", "mesh", "cao"]: - if key not in tune_params_subset: - tune_params_subset[key] = default_params[key] - - super().tune(**tune_params_subset) - - def _tune(self): - set_prefactor(self._params["prefactor"]) - p3m_set_eps(self._params["epsilon"]) - python_p3m_set_tune_params(self._params["r_cut"], - self._params["mesh"], - self._params["cao"], - -1.0, - self._params["accuracy"]) - python_p3m_adaptive_tune(self._params["verbose"]) - self._params.update(self._get_params_from_es_core()) + def _set_params_in_es_core(self): + self._params["p3m_actor"]._set_params_in_es_core() + if coulomb.method == COULOMB_P3M_GPU: + raise Exception("ELC is not set up to work with the GPU P3M") + + if self._params["const_pot"]: + self._params["delta_mid_top"] = -1 + self._params["delta_mid_bot"] = -1 + + if ELC_set_params( + self._params["maxPWerror"], + self._params["gap_size"], + self._params["far_cut"], + self._params["neutralize"], + self._params["delta_mid_top"], + self._params["delta_mid_bot"], + self._params["const_pot"], + self._params["pot_diff"]): + handle_errors("ELC tuning failed") - def _activate_method(self): - check_neutrality(self._params) - python_p3m_gpu_init(self._params) - coulomb.method = COULOMB_P3M_GPU - if self._params["tune"]: - self._tune() - python_p3m_gpu_init(self._params) - self._set_params_in_es_core() + def tune(self, **tune_params_subset): + self._params["p3m_actor"].tune(**tune_params_subset) - def _set_params_in_es_core(self): - set_prefactor(self._params["prefactor"]) - python_p3m_set_params(self._params["r_cut"], - self._params["mesh"], - self._params["cao"], - self._params["alpha"], - self._params["accuracy"]) - p3m_set_eps(self._params["epsilon"]) - python_p3m_set_mesh_offset(self._params["mesh_off"]) - handle_errors("p3m gpu init") + def _activate_method(self): + self._params["p3m_actor"]._activate_method() + check_neutrality(self._params) + self._set_params_in_es_core() IF ELECTROSTATICS: cdef class MMM1D(ElectrostaticInteraction): @@ -550,8 +595,12 @@ IF ELECTROSTATICS: self._params["far_switch_radius"], self._params["maxPWerror"]) def _tune(self): - cdef int resp - pyMMM1D_tune(self._params["verbose"]) + resp = MMM1D_init() + if resp: + handle_errors("MMM1D: initialization failed") + resp = mmm1d_tune(self._params["verbose"]) + if resp: + handle_errors("MMM1D: tuning failed") self._params.update(self._get_params_from_es_core()) def _activate_method(self): diff --git a/src/python/espressomd/globals.pyx b/src/python/espressomd/globals.pyx index 20ca445af16..3321d27e9cb 100644 --- a/src/python/espressomd/globals.pyx +++ b/src/python/espressomd/globals.pyx @@ -24,8 +24,8 @@ from .globals cimport sim_time from .globals cimport timing_samples from .globals cimport forcecap_set from .globals cimport forcecap_get -from .utils import array_locked -from .utils cimport Vector3d, make_array_locked, handle_errors +from .utils import array_locked, handle_errors +from .utils cimport Vector3d, make_array_locked cdef class Globals: property box_l: diff --git a/src/python/espressomd/integrate.pxd b/src/python/espressomd/integrate.pxd index 3869f0dfcc5..4b290011588 100644 --- a/src/python/espressomd/integrate.pxd +++ b/src/python/espressomd/integrate.pxd @@ -42,18 +42,12 @@ IF NPT: cbool xdir_rescale, cbool ydir_rescale, cbool zdir_rescale, cbool cubic_box) except + -cdef extern from "stokesian_dynamics/sd_interface.hpp": - IF STOKESIAN_DYNAMICS: +IF STOKESIAN_DYNAMICS: + cdef extern from "stokesian_dynamics/sd_interface.hpp": void set_sd_viscosity(double eta) except + - double get_sd_viscosity() - void set_sd_radius_dict(const unordered_map[int, double] & radius_dict) except + - unordered_map[int, double] get_sd_radius_dict() - void set_sd_flags(int flg) - int get_sd_flags() -IF STOKESIAN_DYNAMICS: cpdef enum flags: NONE = 0, SELF_MOBILITY = 1 << 0, diff --git a/src/python/espressomd/integrate.pyx b/src/python/espressomd/integrate.pyx index 7f25708d82e..e0abd70ab91 100644 --- a/src/python/espressomd/integrate.pyx +++ b/src/python/espressomd/integrate.pyx @@ -18,8 +18,8 @@ # from cpython.exc cimport PyErr_CheckSignals, PyErr_SetInterrupt include "myconfig.pxi" -from .utils cimport handle_errors, check_type_or_throw_except -from .utils import to_char_pointer +from .utils cimport check_type_or_throw_except +from .utils import to_char_pointer, handle_errors from . cimport integrate cdef class IntegratorHandle: diff --git a/src/python/espressomd/magnetostatic_extensions.pyx b/src/python/espressomd/magnetostatic_extensions.pyx index 872504df0b9..e42eb695a05 100644 --- a/src/python/espressomd/magnetostatic_extensions.pyx +++ b/src/python/espressomd/magnetostatic_extensions.pyx @@ -20,7 +20,8 @@ from . cimport utils include "myconfig.pxi" from .actors import Actor -from .utils cimport handle_errors, check_range_or_except, check_type_or_throw_except +from .utils import handle_errors +from .utils cimport check_range_or_except, check_type_or_throw_except IF DIPOLES and DP3M: class MagnetostaticExtension(Actor): @@ -41,8 +42,11 @@ IF DIPOLES and DP3M: Parameters ---------- gap_size : :obj:`float` - Size of the empty gap. Note that DLC relies on the user to make - sure that this condition is fulfilled. + The gap size gives the height :math:`h` of the empty region between + the system box and the neighboring artificial images. |es| checks + that the gap is empty and will throw an error if it isn't. Therefore + you should really make sure that the gap region is empty (e.g. + with wall constraints). maxPWerror : :obj:`float` Maximal pairwise error of the potential and force. far_cut : :obj:`float`, optional @@ -56,13 +60,18 @@ IF DIPOLES and DP3M: """ default_params = self.default_params() check_type_or_throw_except( - self._params["maxPWerror"], 1, float, "") + self._params["maxPWerror"], 1, float, + "maxPWerror has to be a float") check_range_or_except( self._params, "maxPWerror", 0, False, "inf", True) - check_type_or_throw_except(self._params["gap_size"], 1, float, "") + check_type_or_throw_except( + self._params["gap_size"], 1, float, + "gap_size has to be a float") check_range_or_except( self._params, "gap_size", 0, False, "inf", True) - check_type_or_throw_except(self._params["far_cut"], 1, float, "") + check_type_or_throw_except( + self._params["far_cut"], 1, float, + "far_cut has to be a float") def valid_keys(self): return ["maxPWerror", "gap_size", "far_cut"] diff --git a/src/python/espressomd/magnetostatics.pxd b/src/python/espressomd/magnetostatics.pxd index 166f0cf3307..c59d9979bbb 100644 --- a/src/python/espressomd/magnetostatics.pxd +++ b/src/python/espressomd/magnetostatics.pxd @@ -16,7 +16,6 @@ # along with this program. If not, see . from libcpp cimport bool -from .utils cimport handle_errors include "myconfig.pxi" @@ -63,19 +62,14 @@ IF DP3M == 1: from p3m_common cimport P3MParameters cdef extern from "electrostatics_magnetostatics/p3m-dipolar.hpp": - int dp3m_set_params(double r_cut, int mesh, int cao, double alpha, double accuracy) - void dp3m_set_tune_params(double r_cut, int mesh, int cao, double alpha, double accuracy) - int dp3m_set_mesh_offset(double x, double y, double z) - int dp3m_set_eps(double eps) + void dp3m_set_params(double r_cut, int mesh, int cao, double alpha, double accuracy) except + + void dp3m_set_tune_params(double r_cut, int mesh, int cao, double accuracy) + void dp3m_set_mesh_offset(double x, double y, double z) except + + void dp3m_set_eps(double eps) int dp3m_adaptive_tune(bool verbose) - int dp3m_deactivate() + void dp3m_deactivate() ctypedef struct dp3m_data_struct: P3MParameters params cdef extern dp3m_data_struct dp3m - - cdef inline python_dp3m_adaptive_tune(bool verbose): - cdef int response = dp3m_adaptive_tune(verbose) - if response: - handle_errors("python_dp3m_adaptive_tune") diff --git a/src/python/espressomd/magnetostatics.pyx b/src/python/espressomd/magnetostatics.pyx index 086748acc2f..5da552948aa 100644 --- a/src/python/espressomd/magnetostatics.pyx +++ b/src/python/espressomd/magnetostatics.pyx @@ -23,7 +23,7 @@ IF SCAFACOS == 1: from .scafacos import ScafacosConnector from . cimport scafacos -from .utils cimport handle_errors +from .utils import handle_errors from .utils import is_valid_type, check_type_or_throw_except, to_str IF DIPOLES == 1: @@ -96,10 +96,6 @@ IF DP3M == 1: super().validate_params() default_params = self.default_params() - if not (self._params["r_cut"] >= 0 - or self._params["r_cut"] == default_params["r_cut"]): - raise ValueError("P3M r_cut has to be >=0") - if is_valid_type(self._params["mesh"], int): pass else: @@ -110,13 +106,6 @@ IF DP3M == 1: raise ValueError( "DipolarP3M requires a cubic box") - if not (self._params["cao"] >= -1 and self._params["cao"] <= 7): - raise ValueError( - "P3M cao has to be an integer between -1 and 7") - - if not (self._params["accuracy"] > 0): - raise ValueError("P3M accuracy has to be positive") - if self._params["epsilon"] == "metallic": self._params["epsilon"] = 0.0 @@ -131,7 +120,7 @@ IF DP3M == 1: def valid_keys(self): return ["prefactor", "alpha_L", "r_cut_iL", "mesh", "mesh_off", "cao", "accuracy", "epsilon", "cao_cut", "a", "ai", - "alpha", "r_cut", "cao3", "additional_mesh", "tune", "verbose"] + "alpha", "r_cut", "cao3", "tune", "verbose"] def required_keys(self): return ["accuracy", ] @@ -154,20 +143,32 @@ IF DP3M == 1: return params def _set_params_in_es_core(self): + if hasattr(self._params["mesh"], "__getitem__"): + mesh = self._params["mesh"][0] + else: + mesh = self._params["mesh"] + self.set_magnetostatics_prefactor() dp3m_set_eps(self._params["epsilon"]) - self.python_dp3m_set_mesh_offset(self._params["mesh_off"]) - self.python_dp3m_set_params( - self._params["r_cut"], self._params["mesh"], - self._params["cao"], self._params["alpha"], self._params["accuracy"]) + dp3m_set_mesh_offset(self._params["mesh_off"][0], + self._params["mesh_off"][1], + self._params["mesh_off"][2]) + dp3m_set_params(self._params["r_cut"], mesh, self._params["cao"], + self._params["alpha"], self._params["accuracy"]) def _tune(self): + if hasattr(self._params["mesh"], "__getitem__"): + mesh = self._params["mesh"][0] + else: + mesh = self._params["mesh"] + self.set_magnetostatics_prefactor() dp3m_set_eps(self._params["epsilon"]) - self.python_dp3m_set_tune_params( - self._params["r_cut"], self._params["mesh"], - self._params["cao"], -1., self._params["accuracy"]) - python_dp3m_adaptive_tune(self._params["verbose"]) + dp3m_set_tune_params(self._params["r_cut"], mesh, + self._params["cao"], self._params["accuracy"]) + tuning_error = dp3m_adaptive_tune(self._params["verbose"]) + if tuning_error: + handle_errors("DipolarP3M: tuning failed") self._params.update(self._get_params_from_es_core()) def _activate_method(self): @@ -181,48 +182,6 @@ IF DP3M == 1: dp3m_deactivate() super()._deactivate_method() - def python_dp3m_set_mesh_offset(self, mesh_off): - cdef double mesh_offset[3] - mesh_offset[0] = mesh_off[0] - mesh_offset[1] = mesh_off[1] - mesh_offset[2] = mesh_off[2] - return dp3m_set_mesh_offset( - mesh_offset[0], mesh_offset[1], mesh_offset[2]) - - def python_dp3m_set_params(self, p_r_cut, p_mesh, p_cao, p_alpha, - p_accuracy): - cdef int mesh - cdef double r_cut - cdef int cao - cdef double alpha - cdef double accuracy - r_cut = p_r_cut - cao = p_cao - alpha = p_alpha - accuracy = p_accuracy - if hasattr(p_mesh, "__getitem__"): - mesh = p_mesh[0] - else: - mesh = p_mesh - dp3m_set_params(r_cut, mesh, cao, alpha, accuracy) - - def python_dp3m_set_tune_params(self, p_r_cut, p_mesh, p_cao, p_alpha, - p_accuracy): - cdef int mesh - cdef double r_cut - cdef int cao - cdef double alpha - cdef double accuracy - r_cut = p_r_cut - cao = p_cao - alpha = p_alpha - accuracy = p_accuracy - if hasattr(p_mesh, "__getitem__"): - mesh = p_mesh[0] - else: - mesh = p_mesh - dp3m_set_tune_params(r_cut, mesh, cao, alpha, accuracy) - IF DIPOLES == 1: cdef class DipolarDirectSumCpu(MagnetostaticInteraction): """ diff --git a/src/python/espressomd/math.py b/src/python/espressomd/math.py new file mode 100644 index 00000000000..5b7ff893eb4 --- /dev/null +++ b/src/python/espressomd/math.py @@ -0,0 +1,36 @@ +# Copyright (C) 2010-2019 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from .script_interface import ScriptInterfaceHelper, script_interface_register + + +@script_interface_register +class CylindricalTransformationParameters(ScriptInterfaceHelper): + """ + Class to hold and validate the parameters needed for a cylindrical transformation. + The three parameters are available as attributes but are read-only. + + Parameters + ---------- + center : (3,) array_like of :obj:`float`, default = [0, 0, 0] + Position of the origin of the cylindrical coordinate system. + axis : (3,) array_like of :obj:`float`, default = [0, 0, 1] + Orientation vector of the ``z``-axis of the cylindrical coordinate system. + orientation: (3,) array_like of :obj:`float`, default = [1, 0, 0] + The axis on which ``phi = 0``. + """ + _so_name = "CylindricalTransformationParameters" diff --git a/src/python/espressomd/observables.py b/src/python/espressomd/observables.py index 2169bee15e9..430e68755c4 100644 --- a/src/python/espressomd/observables.py +++ b/src/python/espressomd/observables.py @@ -17,6 +17,7 @@ import itertools import numpy as np from .script_interface import ScriptInterfaceHelper, script_interface_register +from .math import CylindricalTransformationParameters @script_interface_register @@ -69,6 +70,18 @@ def bin_centers(self): return np.array(list(itertools.product(*edges))).reshape(shape) +class CylindricalProfileObservable(ProfileObservable): + """ + Base class for observables that work with cylinder coordinates + """ + + def __init__( + self, transform_params=CylindricalTransformationParameters(), **kwargs): + # Provide default transformation parameters if not user-provided + kwargs['transform_params'] = transform_params + super().__init__(**kwargs) + + @script_interface_register class ComPosition(Observable): @@ -636,7 +649,7 @@ class DPDStress(Observable): @script_interface_register -class CylindricalDensityProfile(ProfileObservable): +class CylindricalDensityProfile(CylindricalProfileObservable): """Calculates the particle density in cylindrical coordinates. @@ -644,26 +657,24 @@ class CylindricalDensityProfile(ProfileObservable): ---------- ids : array_like of :obj:`int` The ids of (existing) particles to take into account. - center : (3,) array_like of :obj:`float` - Position of the center of the cylindrical coordinate system for the histogram. - axis : (3,) array_like of :obj:`float` - Orientation vector of the ``z``-axis of the cylindrical coordinate system for the histogram. - n_r_bins : :obj:`int` + transform_params : :class:`espressomd.math.CylindricalTransformationParameters`, optional + Parameters of the cylinder transformation. Defaults to the default of :class:`espressomd.math.CylindricalTransformationParameters` + n_r_bins : :obj:`int`, default = 1 Number of bins in radial direction. - n_phi_bins : :obj:`int` + n_phi_bins : :obj:`int`, default = 1 Number of bins for the azimuthal direction. - n_z_bins : :obj:`int` + n_z_bins : :obj:`int`, default = 1 Number of bins in ``z`` direction. - min_r : :obj:`float` + min_r : :obj:`float`, default = 0 Minimum ``r`` to consider. - min_phi : :obj:`float` - Minimum ``phi`` to consider. + min_phi : :obj:`float`, default = -pi + Minimum ``phi`` to consider. Must be in [-pi,pi). min_z : :obj:`float` Minimum ``z`` to consider. max_r : :obj:`float` Maximum ``r`` to consider. - max_phi : :obj:`float` - Maximum ``phi`` to consider. + max_phi : :obj:`float`, default = pi + Maximum ``phi`` to consider. Must be in (-pi,pi]. max_z : :obj:`float` Maximum ``z`` to consider. @@ -676,7 +687,7 @@ class CylindricalDensityProfile(ProfileObservable): @script_interface_register -class CylindricalFluxDensityProfile(ProfileObservable): +class CylindricalFluxDensityProfile(CylindricalProfileObservable): """Calculates the particle flux density in cylindrical coordinates. @@ -684,26 +695,24 @@ class CylindricalFluxDensityProfile(ProfileObservable): ---------- ids : array_like of :obj:`int` The ids of (existing) particles to take into account. - center : (3,) array_like of :obj:`float` - Position of the center of the cylindrical coordinate system for the histogram. - axis : (3,) array_like of :obj:`float` - Orientation vector of the ``z``-axis of the cylindrical coordinate system for the histogram. - n_r_bins : :obj:`int` + transform_params : :class:`espressomd.math.CylindricalTransformationParameters`, optional + Parameters of the cylinder transformation. Defaults to the default of :class:`espressomd.math.CylindricalTransformationParameters` + n_r_bins : :obj:`int`, default = 1 Number of bins in radial direction. - n_phi_bins : :obj:`int` + n_phi_bins : :obj:`int`, default = 1 Number of bins for the azimuthal direction. - n_z_bins : :obj:`int` + n_z_bins : :obj:`int`, default = 1 Number of bins in ``z`` direction. - min_r : :obj:`float` + min_r : :obj:`float`, default = 0 Minimum ``r`` to consider. - min_phi : :obj:`float` - Minimum ``phi`` to consider. + min_phi : :obj:`float`, default = -pi + Minimum ``phi`` to consider. Must be in [-pi,pi). min_z : :obj:`float` Minimum ``z`` to consider. max_r : :obj:`float` Maximum ``r`` to consider. - max_phi : :obj:`float` - Maximum ``phi`` to consider. + max_phi : :obj:`float`, default = pi + Maximum ``phi`` to consider. Must be in (-pi,pi]. max_z : :obj:`float` Maximum ``z`` to consider. @@ -718,7 +727,8 @@ class CylindricalFluxDensityProfile(ProfileObservable): @script_interface_register -class CylindricalLBFluxDensityProfileAtParticlePositions(ProfileObservable): +class CylindricalLBFluxDensityProfileAtParticlePositions( + CylindricalProfileObservable): """Calculates the LB fluid flux density at the particle positions in cylindrical coordinates. @@ -727,26 +737,24 @@ class CylindricalLBFluxDensityProfileAtParticlePositions(ProfileObservable): ---------- ids : array_like of :obj:`int` The ids of (existing) particles to take into account. - center : (3,) array_like of :obj:`float` - Position of the center of the cylindrical coordinate system for the histogram. - axis : (3,) array_like of :obj:`float` - Orientation vector of the ``z``-axis of the cylindrical coordinate system for the histogram. - n_r_bins : :obj:`int` + transform_params : :class:`espressomd.math.CylindricalTransformationParameters`, optional + Parameters of the cylinder transformation. Defaults to the default of :class:`espressomd.math.CylindricalTransformationParameters` + n_r_bins : :obj:`int`, default = 1 Number of bins in radial direction. - n_phi_bins : :obj:`int` + n_phi_bins : :obj:`int`, default = 1 Number of bins for the azimuthal direction. - n_z_bins : :obj:`int` + n_z_bins : :obj:`int`, default = 1 Number of bins in ``z`` direction. - min_r : :obj:`float` + min_r : :obj:`float`, default = 0 Minimum ``r`` to consider. - min_phi : :obj:`float` - Minimum ``phi`` to consider. + min_phi : :obj:`float`, default = -pi + Minimum ``phi`` to consider. Must be in [-pi,pi). min_z : :obj:`float` Minimum ``z`` to consider. max_r : :obj:`float` Maximum ``r`` to consider. - max_phi : :obj:`float` - Maximum ``phi`` to consider. + max_phi : :obj:`float`, default = pi + Maximum ``phi`` to consider. Must be in (-pi,pi]. max_z : :obj:`float` Maximum ``z`` to consider. @@ -761,7 +769,8 @@ class CylindricalLBFluxDensityProfileAtParticlePositions(ProfileObservable): @script_interface_register -class CylindricalLBVelocityProfileAtParticlePositions(ProfileObservable): +class CylindricalLBVelocityProfileAtParticlePositions( + CylindricalProfileObservable): """Calculates the LB fluid velocity at the particle positions in cylindrical coordinates. @@ -770,26 +779,24 @@ class CylindricalLBVelocityProfileAtParticlePositions(ProfileObservable): ---------- ids : array_like of :obj:`int` The ids of (existing) particles to take into account. - center : (3,) array_like of :obj:`float` - Position of the center of the cylindrical coordinate system for the histogram. - axis : (3,) array_like of :obj:`float` - Orientation vector of the ``z``-axis of the cylindrical coordinate system for the histogram. - n_r_bins : :obj:`int` + transform_params : :class:`espressomd.math.CylindricalTransformationParameters`, optional + Parameters of the cylinder transformation. Defaults to the default of :class:`espressomd.math.CylindricalTransformationParameters` + n_r_bins : :obj:`int`, default = 1 Number of bins in radial direction. - n_phi_bins : :obj:`int` + n_phi_bins : :obj:`int`, default = 1 Number of bins for the azimuthal direction. - n_z_bins : :obj:`int` + n_z_bins : :obj:`int`, default = 1 Number of bins in ``z`` direction. - min_r : :obj:`float` + min_r : :obj:`float`, default = 0 Minimum ``r`` to consider. - min_phi : :obj:`float` - Minimum ``phi`` to consider. + min_phi : :obj:`float`, default = -pi + Minimum ``phi`` to consider. Must be in [-pi,pi). min_z : :obj:`float` Minimum ``z`` to consider. max_r : :obj:`float` Maximum ``r`` to consider. - max_phi : :obj:`float` - Maximum ``phi`` to consider. + max_phi : :obj:`float`, default = pi + Maximum ``phi`` to consider. Must be in (-pi,pi]. max_z : :obj:`float` Maximum ``z`` to consider. @@ -804,7 +811,7 @@ class CylindricalLBVelocityProfileAtParticlePositions(ProfileObservable): @script_interface_register -class CylindricalVelocityProfile(ProfileObservable): +class CylindricalVelocityProfile(CylindricalProfileObservable): """Calculates the particle velocity profile in cylindrical coordinates. @@ -812,26 +819,24 @@ class CylindricalVelocityProfile(ProfileObservable): ---------- ids : array_like of :obj:`int` The ids of (existing) particles to take into account. - center : (3,) array_like of :obj:`float` - Position of the center of the cylindrical coordinate system for the histogram. - axis : (3,) array_like of :obj:`float` - Orientation vector of the ``z``-axis of the cylindrical coordinate system for the histogram. - n_r_bins : :obj:`int` + transform_params : :class:`espressomd.math.CylindricalTransformationParameters`, optional + Parameters of the cylinder transformation. Defaults to the default of :class:`espressomd.math.CylindricalTransformationParameters` + n_r_bins : :obj:`int`, default = 1 Number of bins in radial direction. - n_phi_bins : :obj:`int` + n_phi_bins : :obj:`int`, default = 1 Number of bins for the azimuthal direction. - n_z_bins : :obj:`int` + n_z_bins : :obj:`int`, default = 1 Number of bins in ``z`` direction. - min_r : :obj:`float` + min_r : :obj:`float`, default = 0 Minimum ``r`` to consider. - min_phi : :obj:`float` - Minimum ``phi`` to consider. + min_phi : :obj:`float`, default = -pi + Minimum ``phi`` to consider. Must be in [-pi,pi). min_z : :obj:`float` Minimum ``z`` to consider. max_r : :obj:`float` Maximum ``r`` to consider. - max_phi : :obj:`float` - Maximum ``phi`` to consider. + max_phi : :obj:`float`, default = pi + Maximum ``phi`` to consider. Must be in (-pi,pi]. max_z : :obj:`float` Maximum ``z`` to consider. @@ -846,7 +851,7 @@ class CylindricalVelocityProfile(ProfileObservable): @script_interface_register -class CylindricalLBVelocityProfile(ProfileObservable): +class CylindricalLBVelocityProfile(CylindricalProfileObservable): """Calculates the LB fluid velocity profile in cylindrical coordinates. @@ -856,26 +861,24 @@ class CylindricalLBVelocityProfile(ProfileObservable): Parameters ---------- - center : (3,) array_like of :obj:`float` - Position of the center of the cylindrical coordinate system for the histogram. - axis : (3,) array_like of :obj:`float` - Orientation vector of the ``z``-axis of the cylindrical coordinate system for the histogram. - n_r_bins : :obj:`int` + transform_params : :class:`espressomd.math.CylindricalTransformationParameters`, optional + Parameters of the cylinder transformation. Defaults to the default of :class:`espressomd.math.CylindricalTransformationParameters` + n_r_bins : :obj:`int`, default = 1 Number of bins in radial direction. - n_phi_bins : :obj:`int` + n_phi_bins : :obj:`int`, default = 1 Number of bins for the azimuthal direction. - n_z_bins : :obj:`int` + n_z_bins : :obj:`int`, default = 1 Number of bins in ``z`` direction. - min_r : :obj:`float` + min_r : :obj:`float`, default = 0 Minimum ``r`` to consider. - min_phi : :obj:`float` - Minimum ``phi`` to consider. + min_phi : :obj:`float`, default = -pi + Minimum ``phi`` to consider. Must be in [-pi,pi). min_z : :obj:`float` Minimum ``z`` to consider. max_r : :obj:`float` Maximum ``r`` to consider. - max_phi : :obj:`float` - Maximum ``phi`` to consider. + max_phi : :obj:`float`, default = pi + Maximum ``phi`` to consider. Must be in (-pi,pi]. max_z : :obj:`float` Maximum ``z`` to consider. sampling_density : :obj:`float` @@ -920,7 +923,7 @@ class RDF(Observable): _so_name = "Observables::RDF" def __init__(self, **kwargs): - if "oid" not in kwargs and "ids2" not in kwargs: + if "ids2" not in kwargs: kwargs["ids2"] = [] super().__init__(**kwargs) diff --git a/src/python/espressomd/p3m_common.pxd b/src/python/espressomd/p3m_common.pxd index 3f7b685038c..aaf7f92b4d9 100644 --- a/src/python/espressomd/p3m_common.pxd +++ b/src/python/espressomd/p3m_common.pxd @@ -30,4 +30,3 @@ IF P3M == 1 or DP3M == 1: double a[3] double alpha double r_cut - double additional_mesh[3] diff --git a/src/python/espressomd/particle_data.pxd b/src/python/espressomd/particle_data.pxd index dc50d78194b..2b3d54e4c83 100644 --- a/src/python/espressomd/particle_data.pxd +++ b/src/python/espressomd/particle_data.pxd @@ -172,7 +172,6 @@ cdef extern from "particle_data.hpp": IF EXCLUSIONS: int change_exclusion(int part, int part2, int _delete) - void remove_all_exclusions() IF ENGINE: void set_particle_swimming(int part, particle_parameters_swimming swim) diff --git a/src/python/espressomd/particle_data.pyx b/src/python/espressomd/particle_data.pyx index 52a9e75ed62..f1b0612fcdb 100644 --- a/src/python/espressomd/particle_data.pyx +++ b/src/python/espressomd/particle_data.pyx @@ -1145,8 +1145,8 @@ cdef class ParticleHandle: constant terminal velocity in either of these methods is completely determined by the friction coefficient. You may only set one of the possibilities ``v_swim`` *or* ``f_swim`` as you cannot relax to constant force - *and* constant velocity at the same time. The setting both ``v_swim`` and - ``f_swim`` to 0.0 thus disables swimming. This option applies to all + *and* constant velocity at the same time. Setting both ``v_swim`` and + ``f_swim`` to 0.0 disables swimming. This option applies to all non-lattice-Boltzmann thermostats. Note that there is no real difference between ``v_swim`` and ``f_swim`` since the latter may always be chosen such that the same terminal velocity is achieved for a given friction coefficient. @@ -1161,7 +1161,7 @@ cdef class ParticleHandle: v_swim : :obj:`float` Achieve a constant velocity by imposing a constant terminal velocity ``v_swim``. This excludes the option ``f_swim``. - mode : :obj:`str`, \{'pusher', 'puller'\} + mode : :obj:`str`, \{'pusher', 'puller', 'N/A'\} The LB flow field can be generated by a pushing or a pulling mechanism, leading to change in the sign of the dipolar flow field with respect to the direction of motion. @@ -1182,10 +1182,10 @@ cdef class ParticleHandle: >>> system = espressomd.System() >>> >>> # Usage with Langevin - >>> system.part.add(id=0, pos=[1,0,0],swimming={'f_swim':0.03}) + >>> system.part.add(id=0, pos=[1, 0, 0], swimming={'f_swim': 0.03}) >>> >>> # Usage with LB - >>> system.part.add(id=1, pos=[2,0,0], swimming={'f_swim': 0.01, + >>> system.part.add(id=1, pos=[2, 0, 0], swimming={'f_swim': 0.01, ... 'mode': 'pusher', 'dipole_length': 2.0}) """ @@ -1228,7 +1228,7 @@ cdef class ParticleHandle: swim.push_pull = 0 else: raise Exception( - "'mode' has to be either 'pusher' or 'puller'.") + "'mode' has to be either 'pusher', 'puller' or 'N/A'.") if 'dipole_length' in _params: check_type_or_throw_except( @@ -1578,7 +1578,7 @@ cdef class _ParticleSliceImpl: id_list = id_list[slice_] # Generate a mask which will remove ids of non-existing particles - mask = np.empty(len(id_list), dtype=np.bool) + mask = np.empty(len(id_list), dtype=type(True)) mask[:] = True for i, id in enumerate(id_list): if not particle_exists(id): @@ -1922,7 +1922,7 @@ Set quat and scalar dipole moment (dipm) instead.") if is_valid_type(idx, int): return particle_exists(idx) if isinstance(idx, (slice, tuple, list, np.ndarray)): - tf_array = np.zeros(len(idx), dtype=np.bool) + tf_array = np.zeros(len(idx), dtype=type(True)) for i in range(len(idx)): tf_array[i] = particle_exists(idx[i]) return tf_array diff --git a/src/python/espressomd/scafacos.pyx b/src/python/espressomd/scafacos.pyx index 62726cf3fa2..93cf4263b7f 100644 --- a/src/python/espressomd/scafacos.pyx +++ b/src/python/espressomd/scafacos.pyx @@ -21,8 +21,7 @@ from .actors cimport Actor from libcpp.string cimport string # import std::string from . cimport electrostatics from . cimport magnetostatics -from .utils import to_char_pointer, to_str -from .utils cimport handle_errors +from .utils import to_char_pointer, to_str, handle_errors include "myconfig.pxi" diff --git a/src/python/espressomd/script_interface.pyx b/src/python/espressomd/script_interface.pyx index a2e6dfcfe23..5af72c5859a 100644 --- a/src/python/espressomd/script_interface.pyx +++ b/src/python/espressomd/script_interface.pyx @@ -15,8 +15,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . import numpy as np -from .utils import to_char_pointer, to_str -from .utils cimport Vector3d, make_array_locked, handle_errors +from .utils import to_char_pointer, to_str, handle_errors +from .utils cimport Vector3d, make_array_locked from libcpp.memory cimport make_shared @@ -44,13 +44,12 @@ cdef class PScriptInterface: Parameters ---------- + sip : :class:`PObjectRef` + Object id of an existing core object (method 1). name : :obj:`str` - Name of the core class to instantiate (method 1). + Name of the core class to instantiate (method 2). \*\*kwargs - Parameters for the core class constructor (method 1). - sip : :class:`PObjectRef` - Object id of an existing core object (method 2). - + Parameters for the core class constructor (method 2). policy : :obj:`str`, \{'GLOBAL', 'LOCAL'\} Creation policy. @@ -59,8 +58,6 @@ cdef class PScriptInterface: sip: :class:`PObjectRef` Pointer to a ScriptInterface object in the core. - policy_: :obj:`str` - Creation policy. """ @@ -184,6 +181,7 @@ cdef Variant python_object_to_variant(value): """Convert Python objects to C++ Variant objects.""" cdef vector[Variant] vec + cdef unordered_map[int, Variant] vmap cdef PObjectRef oref if value is None: @@ -195,6 +193,13 @@ cdef Variant python_object_to_variant(value): if isinstance(value, PScriptInterface): oref = value.get_sip() return make_variant(oref.sip) + elif isinstance(value, dict): + for k, v in value.items(): + if not isinstance(k, int): + raise TypeError( + f"No conversion from type dict_item([({type(k).__name__}, {type(v).__name__})]) to Variant[std::unordered_map]") + vmap[k] = python_object_to_variant(v) + return make_variant[unordered_map[int, Variant]](vmap) elif hasattr(value, '__iter__') and not(type(value) == str): for e in value: vec.push_back(python_object_to_variant(e)) @@ -208,12 +213,14 @@ cdef Variant python_object_to_variant(value): elif np.issubdtype(np.dtype(type(value)), np.floating): return make_variant[double](value) else: - raise TypeError("Unknown type for conversion to Variant") + raise TypeError( + f"No conversion from type {type(value).__name__} to Variant") cdef variant_to_python_object(const Variant & value) except +: """Convert C++ Variant objects to Python objects.""" cdef vector[Variant] vec + cdef unordered_map[int, Variant] vmap cdef shared_ptr[ObjectHandle] ptr if is_none(value): return None @@ -263,6 +270,14 @@ cdef variant_to_python_object(const Variant & value) except +: res.append(variant_to_python_object(i)) return res + if is_type[unordered_map[int, Variant]](value): + vmap = get_value[unordered_map[int, Variant]](value) + res = {} + + for kv in vmap: + res[kv.first] = variant_to_python_object(kv.second) + + return res raise TypeError("Unknown type") diff --git a/src/python/espressomd/system.pyx b/src/python/espressomd/system.pyx index 96b0802f305..692722f1838 100644 --- a/src/python/espressomd/system.pyx +++ b/src/python/espressomd/system.pyx @@ -43,11 +43,11 @@ if LB_BOUNDARIES or LB_BOUNDARIES_GPU: from .ekboundaries import EKBoundaries from .comfixed import ComFixed from .globals import Globals -from .globals cimport FIELD_SIMTIME, FIELD_MAX_OIF_OBJECTS -from .globals cimport integ_switch, max_oif_objects, sim_time +from .globals cimport FIELD_MAX_OIF_OBJECTS +from .globals cimport integ_switch, max_oif_objects from .globals cimport maximal_cutoff_bonded, maximal_cutoff_nonbonded, mpi_bcast_parameter -from .utils cimport handle_errors, check_type_or_throw_except -from .utils import is_valid_type +from .utils cimport check_type_or_throw_except +from .utils import is_valid_type, handle_errors IF VIRTUAL_SITES: from .virtual_sites import ActiveVirtualSitesHandle, VirtualSitesOff @@ -240,13 +240,10 @@ cdef class System: def __set__(self, double _time): if _time < 0: raise ValueError("Simulation time must be >= 0") - global sim_time - sim_time = _time - mpi_bcast_parameter(FIELD_SIMTIME) + self.globals.time = _time def __get__(self): - global sim_time - return sim_time + return self.globals.time property time_step: """ diff --git a/src/python/espressomd/thermostat.pyx b/src/python/espressomd/thermostat.pyx index 5db6938114b..3c7f4678e16 100644 --- a/src/python/espressomd/thermostat.pyx +++ b/src/python/espressomd/thermostat.pyx @@ -632,7 +632,7 @@ cdef class Thermostat: lb_lbcoupling_set_rng_state(0) global thermo_switch - thermo_switch = (thermo_switch or THERMO_LB) + thermo_switch = (thermo_switch | THERMO_LB) mpi_bcast_parameter(FIELD_THERMO_SWITCH) global thermo_virtual diff --git a/src/python/espressomd/utils.pxd b/src/python/espressomd/utils.pxd index c4ddde6bca9..a0b5ab06911 100644 --- a/src/python/espressomd/utils.pxd +++ b/src/python/espressomd/utils.pxd @@ -102,6 +102,7 @@ cdef extern from "utils/quaternion.hpp" namespace "Utils": T & operator[](int i) cdef make_array_locked(Vector3d) +cdef make_array_locked_vector(vector[Vector3d] v) cdef Vector3d make_Vector3d(a) cdef extern from "utils/Factory.hpp" namespace "Utils": diff --git a/src/python/espressomd/utils.pyx b/src/python/espressomd/utils.pyx index 5246b29b1ee..83bc3c704eb 100644 --- a/src/python/espressomd/utils.pyx +++ b/src/python/espressomd/utils.pyx @@ -218,6 +218,13 @@ Use numpy.copy() to get a writable copy." cdef make_array_locked(Vector3d v): return array_locked([v[0], v[1], v[2]]) +cdef make_array_locked_vector(vector[Vector3d] v): + ret = np.empty((v.size(), 3)) + for i in range(v.size()): + for j in range(3): + ret[i][j] = v[i][j] + return array_locked(ret) + cdef Vector3d make_Vector3d(a): cdef Vector3d v @@ -274,7 +281,7 @@ def is_valid_type(value, t): if value is None: return False if t == int: - return isinstance(value, (int, np.integer, np.long)) + return isinstance(value, (int, np.integer)) elif t == float: if hasattr(np, 'float128'): return isinstance( diff --git a/src/python/espressomd/visualization_opengl.py b/src/python/espressomd/visualization_opengl.py index 7a1ec1658ef..412cd37b50f 100644 --- a/src/python/espressomd/visualization_opengl.py +++ b/src/python/espressomd/visualization_opengl.py @@ -1931,6 +1931,7 @@ def __init__(self, shape, particle_type, color, material, self.axis = np.array(self.shape.get_parameter('axis')) self.length = self.shape.get_parameter('length') self.radius = self.shape.get_parameter('radius') + self.open = self.shape.get_parameter('open') self.cap_center_1 = self.center - self.axis / \ np.linalg.norm(self.axis) * 0.5 * self.length self.cap_center_2 = self.center + self.axis / \ @@ -1939,7 +1940,7 @@ def __init__(self, shape, particle_type, color, material, def draw(self): draw_cylinder(self.cap_center_1, self.cap_center_2, self.radius, self.color, self.material, - self.quality, draw_caps=True) + self.quality, draw_caps=not self.open) class Ellipsoid(Shape): diff --git a/src/python/object_in_fluid/oif_classes.py b/src/python/object_in_fluid/oif_classes.py index 883639e7087..c6c1d8e54dc 100644 --- a/src/python/object_in_fluid/oif_classes.py +++ b/src/python/object_in_fluid/oif_classes.py @@ -1092,7 +1092,7 @@ def set_mesh_points(self, file_name=None): i = 0 for line in nodes_coord: # extracts coordinates from the string line line = line.split() - new_position = np.array(line).astype(np.float) + center + new_position = np.array(line).astype(float) + center self.mesh.points[i].set_pos(new_position) i += 1 @@ -1376,7 +1376,6 @@ def elastic_forces( self.append_point_data_to_vtk( file_name=vtk_file, data_name="total_f_metric", data=elastic_forces_norms_list, first_append=first) - first = False # output raw data if raw_data_file is not None: diff --git a/src/python/pypresso.cmakein b/src/python/pypresso.cmakein index 3ff41cce483..4b1ef554801 100755 --- a/src/python/pypresso.cmakein +++ b/src/python/pypresso.cmakein @@ -16,6 +16,7 @@ export PYTHONPATH if [ "@CMAKE_CXX_COMPILER_ID@" != "GNU" ] && [ "@WITH_ASAN@" = "ON" ]; then asan_lib=$("@CMAKE_CXX_COMPILER@" /dev/null -### -o /dev/null -fsanitize=address 2>&1 | grep -o '[" ][^" ]*libclang_rt.asan[^" ]*[^s][" ]' | sed 's/[" ]//g' | sed 's/\.a$/.so/g') + export DYLD_INSERT_LIBRARIES="$asan_lib" for lib in $asan_lib; do test -f $lib && LD_PRELOAD="$lib $LD_PRELOAD" done diff --git a/src/script_interface/CylindricalTransformationParameters.hpp b/src/script_interface/CylindricalTransformationParameters.hpp new file mode 100644 index 00000000000..89ed4b840d0 --- /dev/null +++ b/src/script_interface/CylindricalTransformationParameters.hpp @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2010-2019 The ESPResSo project + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 + * Max-Planck-Institute for Polymer Research, Theory Group + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef SCRIPT_INTERFACE_CYL_TRANSFORM_PARAMS_HPP +#define SCRIPT_INTERFACE_CYL_TRANSFORM_PARAMS_HPP + +#include "script_interface/ScriptInterface.hpp" + +#include "utils/math/cylindrical_transformation_parameters.hpp" + +namespace ScriptInterface { + +class CylindricalTransformationParameters + : public AutoParameters { +public: + CylindricalTransformationParameters() { + add_parameters({{"center", AutoParameter::read_only, + [this]() { return m_transform_params->center(); }}, + {"axis", AutoParameter::read_only, + [this]() { return m_transform_params->axis(); }}, + {"orientation", AutoParameter::read_only, + [this]() { return m_transform_params->orientation(); }}}); + } + std::shared_ptr<::Utils::CylindricalTransformationParameters> + cyl_transform_params() { + return m_transform_params; + } + void do_construct(VariantMap const ¶ms) override { + m_transform_params = + std::make_shared( + get_value_or(params, "center", + Utils::Vector3d{{0, 0, 0}}), + get_value_or(params, "axis", + Utils::Vector3d{{0, 0, 1}}), + get_value_or(params, "orientation", + Utils::Vector3d{{1, 0, 0}})); + } + +private: + std::shared_ptr + m_transform_params; +}; +} // namespace ScriptInterface +#endif diff --git a/src/script_interface/Variant.hpp b/src/script_interface/Variant.hpp index 07168303b7d..a1c34d3e3e4 100644 --- a/src/script_interface/Variant.hpp +++ b/src/script_interface/Variant.hpp @@ -25,9 +25,21 @@ #include +/* This include guards against + * an issue in boost::serialization from boost 1.74.0 that leads to compiler + * error "'library_version_type' is not a member of 'boost::serialization'" + * when including . More details + * in ticket https://github.com/boostorg/serialization/issues/219 + */ +#include +#if BOOST_VERSION / 100000 == 1 && BOOST_VERSION / 100 % 1000 == 74 +#include +#endif + #include #include #include +#include #include #include @@ -48,11 +60,18 @@ constexpr const None none{}; /** * @brief Possible types for parameters. + * + * The visitors and packing functions need to be adapted accordingly when + * extending this variant with new types. For the exact details, see commit + * b48ab62. + * The number of types is limited by macro @c BOOST_MPL_LIMIT_LIST_SIZE + * (defaults to 20). */ using Variant = boost::make_recursive_variant< None, bool, int, size_t, double, std::string, std::vector, std::vector, ObjectRef, std::vector, - Utils::Vector2d, Utils::Vector3d, Utils::Vector4d>::type; + Utils::Vector2d, Utils::Vector3d, Utils::Vector4d, + std::unordered_map>::type; using VariantMap = std::unordered_map; diff --git a/src/script_interface/constraints/couplings.hpp b/src/script_interface/constraints/couplings.hpp index 640a5e934dc..31eeac0bcc3 100644 --- a/src/script_interface/constraints/couplings.hpp +++ b/src/script_interface/constraints/couplings.hpp @@ -19,6 +19,15 @@ #ifndef SCRIPT_INTERFACE_CONSTRAINTS_DETAIL_COUPLINGS_HPP #define SCRIPT_INTERFACE_CONSTRAINTS_DETAIL_COUPLINGS_HPP +/** + * @file + * @brief ScriptInterface implementations for the + * various couplings provided. + * + * These are separated from the Constraints because + * they can be reused together with the couplings themselves. + */ + #include "core/field_coupling/couplings/Charge.hpp" #include "core/field_coupling/couplings/Direct.hpp" #include "core/field_coupling/couplings/Mass.hpp" @@ -27,22 +36,13 @@ #include "script_interface/ScriptInterface.hpp" -#include -#include +#include namespace ScriptInterface { namespace Constraints { namespace detail { using namespace ::FieldCoupling::Coupling; -/** - * @brief ScriptInterface implementations for the - * various couplings provided. - * - * These are separated from the Constraints because - * they can be reused together with the couplings themselves. - */ - /** * Default version for parameterless couplings. */ @@ -59,7 +59,7 @@ template <> struct coupling_parameters_impl { static std::vector params(const This &this_) { return {{ "gamma", - [this_](const Variant &v) { this_().gamma() = get_value(v); }, + AutoParameter::read_only, [this_]() { return this_().gamma(); }, }}; } @@ -70,18 +70,11 @@ template <> struct coupling_parameters_impl { static std::vector params(const This &this_) { return {{ "default_scale", - [this_](const Variant &v) { - this_().default_scale() = get_value(v); - }, + AutoParameter::read_only, [this_]() { return this_().default_scale(); }, }, - {"particle_scales", - [this_](const Variant &v) { - this_().particle_scales() = - Utils::unpack>( - boost::get(v)); - }, - [this_]() { return Utils::pack(this_().particle_scales()); }}}; + {"particle_scales", AutoParameter::read_only, + [this_]() { return make_map(this_().particle_scales()); }}}; } }; @@ -96,12 +89,10 @@ template <> inline Viscous make_coupling(const VariantMap ¶ms) { } template <> inline Scaled make_coupling(const VariantMap ¶ms) { - auto scales = params.count("particle_scale") - ? Utils::unpack>( - get_value(params, "particle_scale")) - : std::unordered_map{}; - - return Scaled{scales, get_value(params, "default_scale")}; + auto const particle_scales = get_value_or>( + params, "particle_scales", {}); + return Scaled{get_map(particle_scales), + get_value(params, "default_scale")}; } } // namespace detail } // namespace Constraints diff --git a/src/script_interface/get_value.hpp b/src/script_interface/get_value.hpp index 955bf06ef3f..d0da51aeb4d 100644 --- a/src/script_interface/get_value.hpp +++ b/src/script_interface/get_value.hpp @@ -93,7 +93,7 @@ struct vector_conversion_visitor : boost::static_visitor> { return v; } - /* We try do unpack variant vectors and check if they + /* We try to unpack variant vectors and check if they * are convertible element by element. */ auto operator()(std::vector const &vv) const { if (N != vv.size()) { @@ -160,6 +160,26 @@ template <> struct get_value_helper, void> { } }; +template +struct GetMapOrEmpty : boost::static_visitor> { + /* Catch all case -> wrong type. */ + template std::unordered_map operator()(U const &) const { + throw boost::bad_get{}; + } + + /* Standard case, correct type */ + std::unordered_map operator()(std::unordered_map const &v) const { + return v; + } +}; + +/* std::unordered_map cases */ +template <> struct get_value_helper, void> { + std::unordered_map operator()(Variant const &v) const { + return boost::apply_visitor(GetMapOrEmpty{}, v); + } +}; + /* This allows direct retrieval of a shared_ptr to the object from an ObjectId variant. If the type is a derived type, the type is also checked. @@ -213,6 +233,33 @@ template T get_value(Variant const &v) { } } +template +std::unordered_map get_map(std::unordered_map const &v) { + std::unordered_map ret; + auto it = v.begin(); + try { + for (; it != v.end(); ++it) { + ret.insert({it->first, detail::get_value_helper{}(it->second)}); + } + } catch (const boost::bad_get &) { + throw Exception("Provided map value of type " + + detail::type_label(it->second) + " is not convertible to " + + Utils::demangle() + + " (raised during the creation of a " + + Utils::demangle>() + ")"); + } + return ret; +} + +template +std::unordered_map make_map(std::unordered_map const &v) { + std::unordered_map ret; + for (auto const &it : v) { + ret.insert({it.first, Variant(it.second)}); + } + return ret; +} + /** * @brief Get a value from a VariantMap by name, or throw * if it does not exist or is not convertible to diff --git a/src/script_interface/initialize.cpp b/src/script_interface/initialize.cpp index 88a7048bc4b..f971b29ae12 100644 --- a/src/script_interface/initialize.cpp +++ b/src/script_interface/initialize.cpp @@ -29,6 +29,7 @@ #include "h5md/initialize.hpp" #endif #include "ComFixed.hpp" +#include "CylindricalTransformationParameters.hpp" #include "accumulators/initialize.hpp" #include "collision_detection/initialize.hpp" #include "lbboundaries/initialize.hpp" @@ -53,6 +54,8 @@ void initialize(Utils::Factory *f) { CollisionDetection::initialize(f); f->register_new("ComFixed"); + f->register_new( + "CylindricalTransformationParameters"); } } /* namespace ScriptInterface */ diff --git a/src/script_interface/observables/CylindricalLBProfileObservable.hpp b/src/script_interface/observables/CylindricalLBProfileObservable.hpp index f0f12e3e639..b0eafa3942a 100644 --- a/src/script_interface/observables/CylindricalLBProfileObservable.hpp +++ b/src/script_interface/observables/CylindricalLBProfileObservable.hpp @@ -28,6 +28,8 @@ #include "core/observables/CylindricalLBProfileObservable.hpp" #include "script_interface/get_value.hpp" +#include "script_interface/CylindricalTransformationParameters.hpp" + #include #include @@ -53,18 +55,7 @@ class CylindricalLBProfileObservable using Base::Base; CylindricalLBProfileObservable() { this->add_parameters({ - {"center", - [this](const Variant &v) { - cylindrical_profile_observable()->center = - get_value<::Utils::Vector3d>(v); - }, - [this]() { return cylindrical_profile_observable()->center; }}, - {"axis", - [this](const Variant &v) { - cylindrical_profile_observable()->axis = - get_value(v); - }, - [this]() { return cylindrical_profile_observable()->axis; }}, + {"transform_params", m_transform_params}, {"n_r_bins", [this](const Variant &v) { cylindrical_profile_observable()->n_bins[0] = @@ -149,13 +140,21 @@ class CylindricalLBProfileObservable } void do_construct(VariantMap const ¶ms) override { - m_observable = - make_shared_from_args( - params, "center", "axis", "n_r_bins", "n_phi_bins", "n_z_bins", - "min_r", "max_r", "min_phi", "max_phi", "min_z", "max_z", - "sampling_density"); + set_from_args(m_transform_params, params, "transform_params"); + + if (m_transform_params) + m_observable = std::make_shared( + m_transform_params->cyl_transform_params(), + get_value_or(params, "n_r_bins", 1), + get_value_or(params, "n_phi_bins", 1), + get_value_or(params, "n_z_bins", 1), + get_value_or(params, "min_r", 0.), + get_value(params, "max_r"), + get_value_or(params, "min_phi", -Utils::pi()), + get_value_or(params, "max_phi", Utils::pi()), + get_value(params, "min_z"), + get_value(params, "max_z"), + get_value(params, "sampling_density")); } Variant do_call_method(std::string const &method, @@ -180,6 +179,7 @@ class CylindricalLBProfileObservable private: std::shared_ptr m_observable; + std::shared_ptr m_transform_params; }; } /* namespace Observables */ diff --git a/src/script_interface/observables/CylindricalPidProfileObservable.hpp b/src/script_interface/observables/CylindricalPidProfileObservable.hpp index 9d22325af17..d541880b367 100644 --- a/src/script_interface/observables/CylindricalPidProfileObservable.hpp +++ b/src/script_interface/observables/CylindricalPidProfileObservable.hpp @@ -27,11 +27,14 @@ #include "Observable.hpp" #include "core/observables/CylindricalPidProfileObservable.hpp" -#include +#include +#include +#include #include #include #include + #include #include @@ -58,18 +61,7 @@ class CylindricalPidProfileObservable get_value>(v); }, [this]() { return cylindrical_pid_profile_observable()->ids(); }}, - {"center", - [this](const Variant &v) { - cylindrical_pid_profile_observable()->center = - get_value<::Utils::Vector3d>(v); - }, - [this]() { return cylindrical_pid_profile_observable()->center; }}, - {"axis", - [this](const Variant &v) { - cylindrical_pid_profile_observable()->axis = - get_value(v); - }, - [this]() { return cylindrical_pid_profile_observable()->axis; }}, + {"transform_params", m_transform_params}, {"n_r_bins", [this](const Variant &v) { cylindrical_pid_profile_observable()->n_bins[0] = @@ -149,13 +141,21 @@ class CylindricalPidProfileObservable }; void do_construct(VariantMap const ¶ms) override { - m_observable = - make_shared_from_args, Utils::Vector3d, - Utils::Vector3d, int, int, int, double, double, - double, double, double, double>( - params, "ids", "center", "axis", "n_r_bins", "n_phi_bins", - "n_z_bins", "min_r", "max_r", "min_phi", "max_phi", "min_z", - "max_z"); + set_from_args(m_transform_params, params, "transform_params"); + + if (m_transform_params) + m_observable = std::make_shared( + get_value>(params, "ids"), + m_transform_params->cyl_transform_params(), + get_value_or(params, "n_r_bins", 1), + get_value_or(params, "n_phi_bins", 1), + get_value_or(params, "n_z_bins", 1), + get_value_or(params, "min_r", 0.), + get_value(params, "max_r"), + get_value_or(params, "min_phi", -Utils::pi()), + get_value_or(params, "max_phi", Utils::pi()), + get_value(params, "min_z"), + get_value(params, "max_z")); } Variant do_call_method(std::string const &method, @@ -180,6 +180,7 @@ class CylindricalPidProfileObservable private: std::shared_ptr m_observable; + std::shared_ptr m_transform_params; }; } /* namespace Observables */ diff --git a/src/script_interface/packed_variant.hpp b/src/script_interface/packed_variant.hpp index 677e098c4c3..f07ee280a69 100644 --- a/src/script_interface/packed_variant.hpp +++ b/src/script_interface/packed_variant.hpp @@ -54,7 +54,8 @@ inline ObjectId object_id(const ObjectHandle *p) { using PackedVariant = boost::make_recursive_variant< None, bool, int, double, std::string, std::vector, std::vector, ObjectId, std::vector, Utils::Vector2d, - Utils::Vector3d, Utils::Vector4d>::type; + Utils::Vector3d, Utils::Vector4d, + std::unordered_map>::type; using PackedMap = std::vector>; @@ -84,6 +85,17 @@ struct PackVisitor : boost::static_visitor { return ret; } + /* For the map, we recurse into each element. */ + auto operator()(const std::unordered_map &map) const { + std::unordered_map ret{}; + + for (auto const &it : map) { + ret.insert({it.first, boost::apply_visitor(*this, it.second)}); + } + + return ret; + } + /* For object references we store the object reference, and * replace it by just an id. */ PackedVariant operator()(const ObjectRef &so_ptr) const { @@ -121,6 +133,17 @@ struct UnpackVisitor : boost::static_visitor { return ret; } + /* For the map, we recurse into each element. */ + auto operator()(const std::unordered_map &map) const { + std::unordered_map ret{}; + + for (auto const &it : map) { + ret.insert({it.first, boost::apply_visitor(*this, it.second)}); + } + + return ret; + } + /* Regular value are just verbatim copied into the result. */ template Variant operator()(T &&val) const { return std::forward(val); diff --git a/src/script_interface/shapes/Union.hpp b/src/script_interface/shapes/Union.hpp index eaa1aff3949..8d689379849 100644 --- a/src/script_interface/shapes/Union.hpp +++ b/src/script_interface/shapes/Union.hpp @@ -57,8 +57,8 @@ class Union : public Shape { } else if (name == "clear") { for (auto &s : m_shapes) { m_core_shape->remove(s->shape()); - m_shapes.clear(); } + m_shapes.clear(); } else if (name == "size") { return static_cast(m_shapes.size()); } else if (name == "empty") { diff --git a/src/shapes/include/shapes/HollowConicalFrustum.hpp b/src/shapes/include/shapes/HollowConicalFrustum.hpp index ac846e0e545..1deb0d98f2e 100644 --- a/src/shapes/include/shapes/HollowConicalFrustum.hpp +++ b/src/shapes/include/shapes/HollowConicalFrustum.hpp @@ -22,6 +22,9 @@ #include "Shape.hpp" #include +#include + +#include namespace Shapes { @@ -48,15 +51,20 @@ class HollowConicalFrustum : public Shape { HollowConicalFrustum() : m_r1(0.0), m_r2(0.0), m_length(0.0), m_thickness(0.0), m_direction(1), m_center{Utils::Vector3d{}}, m_axis{Utils::Vector3d{ - 0, 0, 1}} {} - - void set_r1(double radius) { m_r1 = radius; } - void set_r2(double radius) { m_r2 = radius; } - void set_length(double length) { m_length = length; } - void set_thickness(double thickness) { m_thickness = thickness; } - void set_direction(int dir) { m_direction = dir; } - void set_axis(Utils::Vector3d const &axis) { m_axis = axis; } + 0., 0., 1.}}, + m_orientation{Utils::Vector3d{1., 0., 0.}} {} + void set_r1(double const radius) { m_r1 = radius; } + void set_r2(double const radius) { m_r2 = radius; } + void set_length(double const length) { m_length = length; } + void set_thickness(double const thickness) { m_thickness = thickness; } + void set_direction(int const dir) { m_direction = dir; } + void set_axis(Utils::Vector3d const &axis) { + m_axis = axis; + // Even though the HCF is cylinder-symmetric, it needs a well defined phi=0 + // orientation for the coordinate transformation. + m_orientation = Utils::calc_orthonormal_vector(axis); + } void set_center(Utils::Vector3d const ¢er) { m_center = center; } /// Get radius 1 perpendicular to axis. @@ -92,6 +100,7 @@ class HollowConicalFrustum : public Shape { int m_direction; Utils::Vector3d m_center; Utils::Vector3d m_axis; + Utils::Vector3d m_orientation; }; } // namespace Shapes diff --git a/src/shapes/src/HollowConicalFrustum.cpp b/src/shapes/src/HollowConicalFrustum.cpp index 3db5fb59fc0..ea384252cc4 100644 --- a/src/shapes/src/HollowConicalFrustum.cpp +++ b/src/shapes/src/HollowConicalFrustum.cpp @@ -33,8 +33,9 @@ void HollowConicalFrustum::calculate_dist(const Utils::Vector3d &pos, Utils::Vector3d &vec) const { // transform given position to cylindrical coordinates in the reference frame // of the cone - auto const pos_cyl = - Utils::transform_coordinate_cartesian_to_cylinder(pos - m_center, m_axis); + auto const v = pos - m_center; + auto const pos_cyl = Utils::transform_coordinate_cartesian_to_cylinder( + v, m_axis, m_orientation); // clang-format off /* * the following implementation is based on: @@ -61,7 +62,7 @@ void HollowConicalFrustum::calculate_dist(const Utils::Vector3d &pos, // Transform back to cartesian coordinates. auto const pos_intersection = Utils::transform_coordinate_cylinder_to_cartesian( - {r_intersection, pos_cyl[1], z_intersection}, m_axis) + + {r_intersection, pos_cyl[1], z_intersection}, m_axis, m_orientation) + m_center; auto const u = (pos - pos_intersection).normalize(); diff --git a/src/shapes/unit_tests/CMakeLists.txt b/src/shapes/unit_tests/CMakeLists.txt index 5a92272c020..abe3d2805fd 100644 --- a/src/shapes/unit_tests/CMakeLists.txt +++ b/src/shapes/unit_tests/CMakeLists.txt @@ -6,3 +6,5 @@ unit_test(NAME Union_test SRC Union_test.cpp DEPENDS EspressoShapes EspressoUtils) unit_test(NAME Ellipsoid_test SRC Ellipsoid_test.cpp DEPENDS EspressoShapes EspressoUtils) +unit_test(NAME NoWhere_test SRC NoWhere_test.cpp DEPENDS EspressoShapes + EspressoUtils) diff --git a/src/shapes/unit_tests/NoWhere_test.cpp b/src/shapes/unit_tests/NoWhere_test.cpp new file mode 100644 index 00000000000..057b0a1f2b6 --- /dev/null +++ b/src/shapes/unit_tests/NoWhere_test.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2010-2021 The ESPResSo project + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 + * Max-Planck-Institute for Polymer Research, Theory Group + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#define BOOST_TEST_MODULE NoWhere test +#define BOOST_TEST_DYN_LINK +#include + +#include +#include + +#include + +#include + +bool dist_is_always_inf(const Shapes::Shape &s) { + constexpr auto infinity = std::numeric_limits::infinity(); + + Utils::Vector3d const positions[2] = { + {0.0, 1.0, 2.0}, + {-10.0, 0.1, 5.0}, + }; + + for (auto const &pos : positions) { + Utils::Vector3d dist{}; + double d; + + s.calculate_dist(pos, d, dist); + if (d != infinity) { + return false; + } + + for (auto xyz : dist) { + if (xyz != infinity) { + return false; + } + } + } + + return true; +} + +BOOST_AUTO_TEST_CASE(dist_function) { + Shapes::NoWhere nw; + + BOOST_CHECK(dist_is_always_inf(nw)); +} diff --git a/src/utils/include/utils/math/coordinate_transformation.hpp b/src/utils/include/utils/math/coordinate_transformation.hpp index db31193b062..07f84265f8b 100644 --- a/src/utils/include/utils/math/coordinate_transformation.hpp +++ b/src/utils/include/utils/math/coordinate_transformation.hpp @@ -19,64 +19,141 @@ #ifndef UTILS_COORDINATE_TRANSFORMATION_HPP #define UTILS_COORDINATE_TRANSFORMATION_HPP +/** + * @file + * Convert coordinates from the Cartesian system to the cylindrical system. + * The transformation functions are provided with three overloads: + * - one function for the trivial Cartesian <-> cylindrical transformation + * - one function to transform from/to a cylindrical system with custom axis + * (extra @p axis argument, keep in mind the angle phi is under-defined) + * - one function to transform from/to an oriented cylindrical system with + * custom axis (extra @p orientation argument, the angle phi is well-defined) + */ + #include "utils/Vector.hpp" #include "utils/constants.hpp" #include "utils/math/vec_rotate.hpp" +#include "utils/matrix.hpp" #include "utils/quaternion.hpp" +#include +#include + namespace Utils { -/** \brief Transform the given 3D position to cylinder coordinates with - * longitudinal axis aligned with axis parameter. +/** + * @brief Basis change. */ -inline Vector3d -transform_coordinate_cartesian_to_cylinder(const Vector3d &pos, - const Vector3d &axis) { - static auto const z_axis = Vector3d{{0, 0, 1}}; - double theta; - Vector3d rotation_axis; - auto r = [](auto const &pos) { - return std::sqrt(pos[0] * pos[0] + pos[1] * pos[1]); - }; - auto phi = [](auto const &pos) { return std::atan2(pos[1], pos[0]); }; - if (axis != z_axis) { - std::tie(theta, rotation_axis) = rotation_params(axis, z_axis); - auto const rotated_pos = vec_rotate(rotation_axis, theta, pos); - return {r(rotated_pos), phi(rotated_pos), rotated_pos[2]}; +inline Vector3d basis_change(Vector3d const &b1, Vector3d const &b2, + Vector3d const &b3, Vector3d const &v, + bool reverse = false) { + auto const e_x = b1.normalized(); + auto const e_y = b2.normalized(); + auto const e_z = b3.normalized(); + auto const M = Matrix{ + {e_x[0], e_x[1], e_x[2]}, + {e_y[0], e_y[1], e_y[2]}, + {e_z[0], e_z[1], + e_z[2]}}.transposed(); + if (reverse) { + return M * v; } - return {r(pos), phi(pos), pos[2]}; + return M.inversed() * v; } /** - * @brief Coordinate transformation from cylinder to cartesian coordinates. + * @brief Coordinate transformation from Cartesian to cylindrical coordinates. + * The origins and z-axis of the coordinate systems co-incide. + * The @f$ \phi = 0 @f$ direction corresponds to the x-axis in the + * original coordinate system. + * @param pos %Vector to transform */ inline Vector3d -transform_coordinate_cylinder_to_cartesian(Vector3d const &pos, - Vector3d const &axis) { - Vector3d const transformed{ - {pos[0] * std::cos(pos[1]), pos[0] * std::sin(pos[1]), pos[2]}}; - static auto const z_axis = Vector3d{{0, 0, 1}}; - if (axis == z_axis) - return transformed; - double theta; - Vector3d rotation_axis; - std::tie(theta, rotation_axis) = rotation_params(z_axis, axis); - auto const rotated_pos = vec_rotate(rotation_axis, theta, transformed); - return rotated_pos; +transform_coordinate_cartesian_to_cylinder(Vector3d const &pos) { + auto const r = std::sqrt(pos[0] * pos[0] + pos[1] * pos[1]); + auto const phi = std::atan2(pos[1], pos[0]); + return {r, phi, pos[2]}; +} + +/** + * @brief Coordinate transformation from Cartesian to cylindrical coordinates + * with change of basis. The origins of the coordinate systems co-incide. + * + * If the parameter @p axis is not equal to [0, 0, 1], the value + * of the angle @f$ \phi @f$ in cylindrical coordinates is under-defined. + * To fully define it, it is necessary to provide an orientation vector + * in Cartesian coordinates that will be used as the reference point + * (i.e. such that @f$ \phi = 0 @f$), by default it is the x-axis. + * + * @param pos %Vector to transform + * @param axis Longitudinal axis of the cylindrical coordinates + * @param orientation Reference point (in untransformed coordinates) for + * which @f$ \phi = 0 @f$ + */ +inline Vector3d transform_coordinate_cartesian_to_cylinder( + Vector3d const &pos, Vector3d const &axis, Vector3d const &orientation) { + // check that axis and orientation are orthogonal + assert(std::abs(axis * orientation) < + 5 * std::numeric_limits::epsilon()); + auto const rotation_axis = vector_product(axis, orientation); + auto const pos_t = basis_change(orientation, rotation_axis, axis, pos); + return transform_coordinate_cartesian_to_cylinder(pos_t); } -/** \brief Transform the given 3D vector to cylinder coordinates with - * symmetry axis aligned with axis parameter. +/** + * @brief Coordinate transformation from cylindrical to Cartesian coordinates. + * The origins and z-axis of the coordinate systems co-incide. + * The @f$ \phi = 0 @f$ direction corresponds to the x-axis in the + * transformed coordinate system. + * @param pos %Vector to transform + */ +inline Vector3d +transform_coordinate_cylinder_to_cartesian(Vector3d const &pos) { + auto const &rho = pos[0]; + auto const &phi = pos[1]; + auto const &z = pos[2]; + return {rho * std::cos(phi), rho * std::sin(phi), z}; +} + +/** + * @brief Coordinate transformation from cylindrical to Cartesian coordinates + * with change of basis. The origins of the coordinate systems co-incide. + * + * If the parameter @p axis is not equal to [0, 0, 1], the value + * of the angle @f$ \phi @f$ in cylindrical coordinates is under-defined. + * To fully define it, it is necessary to provide an orientation vector + * in Cartesian coordinates that will be used as the reference point + * (i.e. such that @f$ \phi = 0 @f$). + * + * @param pos %Vector to transform + * @param axis Longitudinal axis of the cylindrical coordinates + * @param orientation Reference point (in Cartesian coordinates) for + * which @f$ \phi = 0 @f$ + */ +inline Vector3d transform_coordinate_cylinder_to_cartesian( + Vector3d const &pos, Vector3d const &axis, Vector3d const &orientation) { + // check that axis and orientation are orthogonal + assert(std::abs(axis * orientation) < + 5 * std::numeric_limits::epsilon()); + auto const rotation_axis = vector_product(axis, orientation); + auto const pos_t = transform_coordinate_cylinder_to_cartesian(pos); + return basis_change(orientation, rotation_axis, axis, pos_t, true); +} + +/** + * @brief Vector transformation from Cartesian to cylindrical coordinates. + * @param vec %Vector to transform + * @param axis Longitudinal axis of the cylindrical coordinates + * @param pos Origin of the vector */ inline Vector3d transform_vector_cartesian_to_cylinder(Vector3d const &vec, Vector3d const &axis, Vector3d const &pos) { static auto const z_axis = Vector3d{{0, 0, 1}}; - double theta; - Vector3d rotation_axis; - std::tie(theta, rotation_axis) = rotation_params(axis, z_axis); - auto const rotated_pos = vec_rotate(rotation_axis, theta, pos); - auto const rotated_vec = vec_rotate(rotation_axis, theta, vec); + auto const angle = angle_between(axis, z_axis); + auto const rotation_axis = Utils::vector_product(axis, z_axis).normalize(); + auto const rotated_pos = vec_rotate(rotation_axis, angle, pos); + auto const rotated_vec = vec_rotate(rotation_axis, angle, vec); auto const r = std::sqrt(rotated_pos[0] * rotated_pos[0] + rotated_pos[1] * rotated_pos[1]); // v_r = (x * v_x + y * v_y) / sqrt(x^2 + y^2) diff --git a/src/utils/include/utils/math/cylindrical_transformation_parameters.hpp b/src/utils/include/utils/math/cylindrical_transformation_parameters.hpp new file mode 100644 index 00000000000..20f18d78a77 --- /dev/null +++ b/src/utils/include/utils/math/cylindrical_transformation_parameters.hpp @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2010-2019 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef ESPRESSO_CYLINDER_TRANSFORMATION_PARAMETERS_HPP +#define ESPRESSO_CYLINDER_TRANSFORMATION_PARAMETERS_HPP + +#include +#include + +#include + +namespace Utils { + +/** + * @brief A class to hold and validate parameters for a cylindrical coordinate + * transformations. + * + * @param center The origin of the cylindrical coordinates. + * @param axis The "z"-axis. Must be normalized. + * @param orientation The axis along which phi = 0. Must be normalized and + * orthogonal to axis. + */ +class CylindricalTransformationParameters { +public: + CylindricalTransformationParameters() = default; + CylindricalTransformationParameters(Utils::Vector3d const ¢er, + Utils::Vector3d const &axis, + Utils::Vector3d const &orientation) + : m_center(center), m_axis(axis), m_orientation(orientation) { + validate(); + } + + Utils::Vector3d center() const { return m_center; } + Utils::Vector3d axis() const { return m_axis; } + Utils::Vector3d orientation() const { return m_orientation; } + +private: + void validate() const { + auto constexpr eps = 10 * std::numeric_limits::epsilon(); + if (Utils::abs(m_orientation * m_axis) > eps) { + throw std::runtime_error( + "CylindricalTransformationParameters: Axis and orientation must be " + "orthogonal. Scalar product is " + + std::to_string(m_orientation * m_axis)); + } + if (Utils::abs(m_axis.norm() - 1) > eps) { + throw std::runtime_error("CylindricalTransformationParameters: Axis must " + "be normalized. Norm is " + + std::to_string(m_axis.norm())); + } + if (Utils::abs(m_orientation.norm() - 1) > eps) { + throw std::runtime_error("CylindricalTransformationParameters: " + "orientation must be normalized. Norm is " + + std::to_string(m_orientation.norm())); + } + } + + const Utils::Vector3d m_center{}; + const Utils::Vector3d m_axis{0, 0, 1}; + const Utils::Vector3d m_orientation{1, 0, 0}; +}; + +} // namespace Utils + +#endif // ESPRESSO_CYLINDER_TRANSFORMATION_PARAMETERS_HPP diff --git a/src/utils/include/utils/math/orthonormal_vec.hpp b/src/utils/include/utils/math/orthonormal_vec.hpp new file mode 100644 index 00000000000..57f2637fab4 --- /dev/null +++ b/src/utils/include/utils/math/orthonormal_vec.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2010-2019 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef ESPRESSO_ORTHONORMAL_VEC_HPP +#define ESPRESSO_ORTHONORMAL_VEC_HPP + +#include "utils/Vector.hpp" +#include "utils/constants.hpp" + +namespace Utils { +/** + * @brief Return a vector that is orthonormal to vec + */ +template +Vector calc_orthonormal_vector(Vector const &vec) { + /* Calculate orthonormal vector using Gram-Schmidt orthogonalization of a + trial vector. Only works if the trial vector is not parallel, so we have to + try a second one in that case + */ + Vector, 2> try_vectors = {Vector::broadcast(0), + Vector::broadcast(0)}; + try_vectors[0][0] = 1; + try_vectors[1][1] = 1; + + Vector ret; + for (auto v : try_vectors) { + auto orth_component = v - (v * vec) / vec.norm2() * vec; + auto norm = orth_component.norm(); + if (norm >= 1. / Utils::sqrt_2()) { + ret = orth_component / norm; + break; + } + } + return ret; +} + +} // namespace Utils + +#endif // ESPRESSO_ORTHONORMAL_VEC_HPP \ No newline at end of file diff --git a/src/utils/include/utils/math/vec_rotate.hpp b/src/utils/include/utils/math/vec_rotate.hpp index 32c1cc23d8d..029852ddd56 100644 --- a/src/utils/include/utils/math/vec_rotate.hpp +++ b/src/utils/include/utils/math/vec_rotate.hpp @@ -47,21 +47,10 @@ inline Vector3d vec_rotate(const Vector3d &axis, double angle, } /** - * @brief Determine rotation angle and axis for rotating vec onto target_vec. - * @param vec Vector to be rotated - * @param target_vec Target vector - * @return rotation angle and rotation axis + * @brief Determine the angle between two vectors. */ -inline std::tuple -rotation_params(Vector3d const &vec, Vector3d const &target_vec) { - if (vec.normalized() != target_vec.normalized()) { - auto const theta = - std::acos(vec * target_vec / (vec.norm() * target_vec.norm())); - auto const rotation_axis = - Utils::vector_product(vec, target_vec).normalize(); - return std::make_tuple(theta, rotation_axis); - } - return std::make_tuple(0.0, Vector3d{}); +inline double angle_between(Vector3d const &v1, Vector3d const &v2) { + return std::acos(v1 * v2 / std::sqrt(v1.norm2() * v2.norm2())); } } // namespace Utils diff --git a/src/utils/include/utils/memory.hpp b/src/utils/include/utils/memory.hpp deleted file mode 100644 index 200c456a18b..00000000000 --- a/src/utils/include/utils/memory.hpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) 2010-2019 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef CORE_UTILS_MEMORY_HPP -#define CORE_UTILS_MEMORY_HPP - -#include -#include -#include -#include - -namespace Utils { - -/*************************************************************/ -/** \name Dynamic memory allocation. */ -/*************************************************************/ -/**@{*/ - -/* to enable us to make sure that freed pointers are invalidated, we normally - try to use realloc. - Unfortunately allocating zero bytes (which should be avoided) actually - allocates 16 bytes, and - reallocating to 0 also. To avoid this, we use our own malloc and realloc - procedures. */ - -/** used instead of realloc. - Makes sure that resizing to zero FREEs pointer */ -template inline T *realloc(T *old, size_t size) { - if (size == 0) { - ::free(static_cast(old)); - return nullptr; - } - - auto *p = static_cast(::realloc(static_cast(old), size)); - - if (p == nullptr) { - throw std::bad_alloc{}; - } - return p; -} - -/** used instead of malloc. - Makes sure that a zero size allocation returns a nullptr pointer */ -inline void *malloc(size_t size) { - if (size == 0) { - return nullptr; - } - - void *p = ::malloc(size); - - if (p == nullptr) { - throw std::bad_alloc{}; - } - return p; -} - -/**@}*/ -} // namespace Utils - -#endif diff --git a/src/utils/include/utils/mpi/all_gatherv.hpp b/src/utils/include/utils/mpi/all_gatherv.hpp deleted file mode 100644 index 2d746fc779d..00000000000 --- a/src/utils/include/utils/mpi/all_gatherv.hpp +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (C) 2010-2019 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef UTILS_MPI_ALL_GATHERV_HPP -#define UTILS_MPI_ALL_GATHERV_HPP - -#include -#include -#include -#include -#include - -namespace Utils { -namespace Mpi { - -namespace detail { -template -void all_gatherv_impl(const boost::mpi::communicator &comm, const T *in_values, - int in_size, T *out_values, const int *sizes, - const int *displs, boost::mpl::true_) { - MPI_Datatype type = boost::mpi::get_mpi_datatype(); - - /* in-place ? */ - if (in_values == out_values) { - BOOST_MPI_CHECK_RESULT(MPI_Allgatherv, - (MPI_IN_PLACE, 0, type, out_values, - const_cast(sizes), const_cast(displs), - type, comm)); - } else { - BOOST_MPI_CHECK_RESULT(MPI_Allgatherv, - (const_cast(in_values), in_size, type, - out_values, const_cast(sizes), - const_cast(displs), type, comm)); - } -} - -template -void all_gatherv_impl(const boost::mpi::communicator &comm, const T *in_values, - int in_size, T *out_values, const int *sizes, - const int *displs, boost::mpl::false_) { - auto const n_nodes = comm.size(); - auto const rank = comm.rank(); - - /* not in-place */ - if (in_values != out_values) { - std::copy_n(in_values, in_size, out_values + displs[rank]); - } - - std::vector req; - for (int i = 0; i < n_nodes; i++) { - if (i != rank) { - req.emplace_back(comm.isend(i, 42, out_values + displs[rank], in_size)); - req.emplace_back(comm.irecv(i, 42, out_values + displs[i], sizes[i])); - } - } - - boost::mpi::wait_all(req.begin(), req.end()); -} -} // namespace detail - -template -void all_gatherv(const boost::mpi::communicator &comm, const T *in_values, - int in_size, T *out_values, const int *sizes, - const int *displs) { - detail::all_gatherv_impl(comm, in_values, in_size, out_values, sizes, displs, - boost::mpi::is_mpi_datatype()); -} - -template -void all_gatherv(const boost::mpi::communicator &comm, const T *in_values, - int in_size, T *out_values, const int *sizes) { - std::vector displ(comm.size()); - - int offset = 0; - for (unsigned i = 0; i < displ.size(); i++) { - displ[i] = offset; - offset += sizes[i]; - } - - detail::all_gatherv_impl(comm, in_values, in_size, out_values, sizes, - displ.data(), boost::mpi::is_mpi_datatype()); -} -} // namespace Mpi -} // namespace Utils -#endif diff --git a/src/utils/include/utils/mpi/gather_buffer.hpp b/src/utils/include/utils/mpi/gather_buffer.hpp index ab827769dc2..61e501856f7 100644 --- a/src/utils/include/utils/mpi/gather_buffer.hpp +++ b/src/utils/include/utils/mpi/gather_buffer.hpp @@ -33,65 +33,6 @@ namespace Utils { namespace Mpi { -namespace detail { -template -void relocate_data(T *buffer, std::vector const &sizes, - std::vector const &displ, int root) { - if (sizes[root] && displ[root]) { - for (int i = sizes[root] - 1; i >= 0; --i) { - buffer[i + displ[root]] = buffer[i]; - } - } -} -} // namespace detail - -/** - * @brief Gather buffer with different size on each node. - * - * Gathers buffers with different lengths from all nodes to root. - * The buffer is assumed to be large enough to hold the data from - * all the nodes and is owned by the caller. On the @p root node, - * the first @p n_elem elements of @p buffer are moved, if need - * be. On the other nodes, @p buffer is not touched. - * - * This encapsulates a common combination of MPI_Gather() - * and MPI_{Send,Recv}(). - * - * @param buffer On the master the target buffer that has to be - * large enough to hold all elements and has the local - * part in the beginning. On the slaves the local buffer. - * @param n_elem The number of elements in the local buffer. - * @param comm The MPI communicator. - * @param root The rank where the data should be gathered. - * @return On rank root, the total number of elements in the buffer, - * on the other ranks 0. - */ -template -int gather_buffer(T *buffer, int n_elem, boost::mpi::communicator comm, - int root = 0) { - if (comm.rank() == root) { - static std::vector sizes; - static std::vector displ; - - auto const total_size = - detail::size_and_offset(sizes, displ, n_elem, comm, root); - - /* Move the original data to its new location */ - detail::relocate_data(buffer, sizes, displ, root); - - /* Gather data */ - gatherv(comm, buffer, 0, buffer, sizes.data(), displ.data(), root); - - return total_size; - } - /* Send local size */ - detail::size_and_offset(n_elem, comm, root); - /* Send data */ - gatherv(comm, buffer, n_elem, static_cast(nullptr), nullptr, nullptr, - root); - - return 0; -} /** * @brief Gather buffer with different size on each node. @@ -125,7 +66,11 @@ void gather_buffer(std::vector &buffer, buffer.resize(tot_size); /* Move the original data to its new location */ - detail::relocate_data(buffer.data(), sizes, displ, root); + if (sizes[root] && displ[root]) { + for (int i = sizes[root] - 1; i >= 0; --i) { + buffer[i + displ[root]] = buffer[i]; + } + } /* Gather data */ gatherv(comm, buffer.data(), buffer.size(), buffer.data(), sizes.data(), diff --git a/src/utils/tests/CMakeLists.txt b/src/utils/tests/CMakeLists.txt index 3b7c549d0f3..a230d4b84ba 100644 --- a/src/utils/tests/CMakeLists.txt +++ b/src/utils/tests/CMakeLists.txt @@ -74,9 +74,9 @@ unit_test(NAME all_compare_test SRC all_compare_test.cpp DEPENDS EspressoUtils Boost::mpi MPI::MPI_CXX NUM_PROC 3) unit_test(NAME gatherv_test SRC gatherv_test.cpp DEPENDS EspressoUtils Boost::mpi MPI::MPI_CXX NUM_PROC 3) -unit_test(NAME all_gatherv_test SRC all_gatherv_test.cpp DEPENDS EspressoUtils - Boost::mpi MPI::MPI_CXX) unit_test(NAME sendrecv_test SRC sendrecv_test.cpp DEPENDS EspressoUtils Boost::mpi MPI::MPI_CXX EspressoUtils NUM_PROC 3) unit_test(NAME matrix_test SRC matrix_test.cpp DEPENDS EspressoUtils Boost::serialization NUM_PROC 1) +unit_test(NAME orthonormal_vec_test SRC orthonormal_vec_test.cpp DEPENDS + EspressoUtils Boost::serialization NUM_PROC 1) diff --git a/src/utils/tests/Factory_test.cpp b/src/utils/tests/Factory_test.cpp index e14355ecd5b..ad461867dae 100644 --- a/src/utils/tests/Factory_test.cpp +++ b/src/utils/tests/Factory_test.cpp @@ -45,7 +45,7 @@ struct OtherDerivedTestClass : public TestClass { }; /* Check registration of construction functions */ -BOOST_AUTO_TEST_CASE(regiser_class) { +BOOST_AUTO_TEST_CASE(register_class) { Utils::Factory factory; factory.register_new("other_derived_class"); @@ -66,7 +66,7 @@ BOOST_AUTO_TEST_CASE(make) { BOOST_CHECK(dynamic_cast(o.get()) != nullptr); } -BOOST_AUTO_TEST_CASE(stable_name_) { +BOOST_AUTO_TEST_CASE(type_name) { const std::string derived_class_name = "derived_test_class"; Utils::Factory factory; diff --git a/src/utils/tests/all_gatherv_test.cpp b/src/utils/tests/all_gatherv_test.cpp deleted file mode 100644 index 211f8b76c08..00000000000 --- a/src/utils/tests/all_gatherv_test.cpp +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (C) 2017-2019 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#define BOOST_TEST_NO_MAIN -#define BOOST_TEST_MODULE all_gather test -#define BOOST_TEST_DYN_LINK -#include -#include - -#include "utils/mpi/all_gatherv.hpp" -using Utils::Mpi::all_gatherv; - -#include -#include - -namespace mpi = boost::mpi; - -BOOST_AUTO_TEST_CASE(mpi_type) { - mpi::communicator world; - auto const rank = world.rank(); - auto const size = world.size(); - - /* out-of-place */ - { - std::vector out(size, -1); - std::vector sizes(size, 1); - - all_gatherv(world, &rank, 1, out.data(), sizes.data()); - - for (int i = 0; i < size; i++) { - BOOST_CHECK_EQUAL(i, out.at(i)); - } - } - - /* in-place */ - { - std::vector out(size, -1); - out[rank] = rank; - std::vector sizes(size, 1); - - all_gatherv(world, out.data(), 1, out.data(), sizes.data()); - - for (int i = 0; i < size; i++) { - BOOST_CHECK_EQUAL(i, out.at(i)); - } - } -} - -BOOST_AUTO_TEST_CASE(non_mpi_type) { - mpi::communicator world; - auto const rank = world.rank(); - auto const size = world.size(); - auto const in = std::to_string(rank); - - /* out-of-place */ - { - std::vector out(size); - std::vector sizes(size, 1); - - all_gatherv(world, &in, 1, out.data(), sizes.data()); - - for (int i = 0; i < size; i++) { - BOOST_CHECK_EQUAL(std::to_string(i), out.at(i)); - } - } - - /* in-place */ - { - std::vector out(size); - out[rank] = in; - std::vector sizes(size, 1); - - all_gatherv(world, out.data(), 1, out.data(), sizes.data()); - - for (int i = 0; i < size; i++) { - BOOST_CHECK_EQUAL(std::to_string(i), out.at(i)); - } - } -} - -int main(int argc, char **argv) { - mpi::environment mpi_env(argc, argv); - - return boost::unit_test::unit_test_main(init_unit_test, argc, argv); -} diff --git a/src/utils/tests/coordinate_transformation.cpp b/src/utils/tests/coordinate_transformation.cpp index b5c5d19e2ee..31371967657 100644 --- a/src/utils/tests/coordinate_transformation.cpp +++ b/src/utils/tests/coordinate_transformation.cpp @@ -25,81 +25,215 @@ #include #include +#include using Utils::Vector3d; BOOST_AUTO_TEST_CASE(cartesian_to_cylinder_test) { - Vector3d const cart_coord{{1.0, 3.3, 2.0}}; - auto const transformed_x = transform_coordinate_cartesian_to_cylinder( - cart_coord, Vector3d{{1, 0, 0}}); - auto const transformed_y = transform_coordinate_cartesian_to_cylinder( - cart_coord, Vector3d{{0, 1, 0}}); - auto const transformed_z = transform_coordinate_cartesian_to_cylinder( - cart_coord, Vector3d{{0, 0, 1}}); - // For x as the symmetry axis we rotate the cartesian coordinates around the - // y-axis by -pi/2. - auto const expected_x = transform_coordinate_cartesian_to_cylinder( - vec_rotate(Vector3d{{0.0, 1.0, 0.0}}, -Utils::pi() / 2.0, cart_coord), - Vector3d{{0, 0, 1}}); - // For y as the symmetry axis we rotate the cartesian coordinates around the - // x-axis by pi/2. - auto const expected_y = transform_coordinate_cartesian_to_cylinder( - vec_rotate(Vector3d{{1.0, 0.0, 0.0}}, Utils::pi() / 2.0, cart_coord), - Vector3d{{0, 0, 1}}); - auto const expected_z = Vector3d{ - {std::sqrt(cart_coord[0] * cart_coord[0] + cart_coord[1] * cart_coord[1]), - std::atan2(cart_coord[1], cart_coord[0]), cart_coord[2]}}; + constexpr auto eps = 1e-14; + auto const pos = Vector3d{{1.0, 3.3, 2.0}}; + auto const cyl = transform_coordinate_cartesian_to_cylinder(pos); + BOOST_CHECK_SMALL(cyl[0] - std::sqrt(pos[0] * pos[0] + pos[1] * pos[1]), eps); + BOOST_CHECK_SMALL(cyl[1] - std::atan2(pos[1], pos[0]), eps); + BOOST_CHECK_SMALL(cyl[2] - pos[2], eps); +} + +BOOST_AUTO_TEST_CASE(basis_transform_test) { + constexpr auto eps = 1e-14; + Vector3d const b_x{{1, 0, 0}}; + Vector3d const b_y{{0, 1, 0}}; + Vector3d const b_z{{0, 0, 1}}; + // identity transform + Vector3d const v{{1, 2, 3}}; + Vector3d const v_identity_transform = Utils::basis_change(b_x, b_y, b_z, v); + // identity transform (swap both the vector and coordinate system) + Vector3d const v_swap_coord_transform = + Utils::basis_change(b_z, b_y, b_x, {{v[2], v[1], v[0]}}); + // non-trivial transform + Vector3d const v1 = Vector3d{{2, 2, 2}}.normalized(); + Vector3d const v2 = Vector3d{{3, 3, -6}}.normalized(); + Vector3d const v3 = Utils::vector_product(v1, v2).normalized(); + Vector3d const v4 = basis_change(v1, v2, v3, 0.1 * v1 + 0.2 * v2 - 0.3 * v3); + Vector3d const v4_expected = Vector3d{{0.1, 0.2, -0.3}}; + for (int i = 0; i < 3; ++i) { + BOOST_CHECK_SMALL(v_identity_transform[i] - v[i], eps); + BOOST_CHECK_SMALL(v_swap_coord_transform[i] - v[i], eps); + BOOST_CHECK_SMALL(v4[i] - v4_expected[i], eps); + } +} +BOOST_AUTO_TEST_CASE( + transform_coordinate_cartesian_to_cylinder_base_change_test) { + constexpr auto eps = 1e-14; + Vector3d const v1{{1, 3, 4}}; + Vector3d const v2{{-3.0, 7, 2}}; + Vector3d const axis = Utils::vector_product(v1, v2).normalized(); + Vector3d const v3 = + Utils::transform_coordinate_cartesian_to_cylinder(v1, axis, v1); + Vector3d const v3_ref{{v1.norm(), 0, 0}}; + auto const angle_v1_v2 = Utils::angle_between(v1, v2); + auto const v4 = Utils::transform_coordinate_cartesian_to_cylinder( + v2 + 2 * axis, axis, v1); + Vector3d v4_ref{{v2.norm(), angle_v1_v2, 2}}; + auto const v5 = Utils::transform_coordinate_cartesian_to_cylinder( + v1 + 2 * axis, axis, v2); + Vector3d v5_ref{{v1.norm(), -angle_v1_v2, 2}}; for (int i = 0; i < 3; ++i) { - BOOST_CHECK(transformed_x[i] == expected_x[i]); - BOOST_CHECK(transformed_y[i] == expected_y[i]); - BOOST_CHECK(transformed_z[i] == expected_z[i]); + BOOST_CHECK_SMALL(v3[i] - v3_ref[i], eps); + BOOST_CHECK_SMALL(v4[i] - v4_ref[i], eps); + BOOST_CHECK_SMALL(v5[i] - v5_ref[i], eps); + } +} + +BOOST_AUTO_TEST_CASE(cartesian_to_cylinder_with_axis_and_orientation_test) { + constexpr auto eps = 1e-14; + // tilted orthogonal basis + auto const y = (Vector3d{{0, 1, -1}}).normalize(); + auto const z = (Vector3d{{1, 1, 1}}).normalize(); + auto const x = Utils::vector_product(y, z); + + // check transformation with orientation (phi is random for r=0) + { + auto const x_cyl = transform_coordinate_cartesian_to_cylinder(x, z, y); + auto const y_cyl = transform_coordinate_cartesian_to_cylinder(y, z, y); + auto const z_cyl = transform_coordinate_cartesian_to_cylinder(z, z, y); + auto const x_ref = Vector3d{{1.0, -Utils::pi() / 2.0, 0.0}}; + auto const y_ref = Vector3d{{1.0, 0.0, 0.0}}; + auto const z_ref = Vector3d{{0.0, z_cyl[1], 1.0}}; + for (int i = 0; i < 3; ++i) { + BOOST_CHECK_SMALL(x_cyl[i] - x_ref[i], eps); + BOOST_CHECK_SMALL(y_cyl[i] - y_ref[i], eps); + BOOST_CHECK_SMALL(z_cyl[i] - z_ref[i], eps); + } + } + // check transformation with orientation for another angle + { + auto const u = vec_rotate(z, Utils::pi() / 3.0, x); + auto const v = vec_rotate(z, Utils::pi() / 3.0, y); + auto const u_cyl = transform_coordinate_cartesian_to_cylinder(u, z, y); + auto const v_cyl = transform_coordinate_cartesian_to_cylinder(v, z, y); + auto const u_ref = Vector3d{{1.0, Utils::pi() * (1. / 3. - 1. / 2.), 0.0}}; + auto const v_ref = Vector3d{{1.0, Utils::pi() / 3.0, 0.0}}; + for (int i = 0; i < 3; ++i) { + BOOST_CHECK_SMALL(u_cyl[i] - u_ref[i], eps); + BOOST_CHECK_SMALL(v_cyl[i] - v_ref[i], eps); + } + } + // check transformation of random vectors + { + std::subtract_with_carry_engine rng(2); + auto const r_uniform = [&rng]() { + return static_cast(rng() - rng.min()) / (rng.max() - rng.min()); + }; + for (int trial = 0; trial < 100; ++trial) { + Vector3d const v1{r_uniform(), r_uniform(), r_uniform()}; + Vector3d const v2{r_uniform(), r_uniform(), r_uniform()}; + auto const a = Utils::vector_product(v1, v2) / v1.norm() / v2.norm(); + auto const v1_v1 = transform_coordinate_cartesian_to_cylinder(v1, a, v1); + auto const v2_v1 = transform_coordinate_cartesian_to_cylinder(v2, a, v1); + auto const v1_v2 = transform_coordinate_cartesian_to_cylinder(v1, a, v2); + Vector3d const v1_v1_ref{v1.norm(), 0.0, 0.0}; + Vector3d const v2_v1_ref{v2.norm(), Utils::angle_between(v1, v2), 0.0}; + Vector3d const v1_v2_ref{v1.norm(), -Utils::angle_between(v1, v2), 0.0}; + for (int i = 0; i < 3; ++i) { + BOOST_CHECK_SMALL(v1_v1[i] - v1_v1_ref[i], eps); + BOOST_CHECK_SMALL(v2_v1[i] - v2_v1_ref[i], eps); + BOOST_CHECK_SMALL(v1_v2[i] - v1_v2_ref[i], eps); + } + } } } BOOST_AUTO_TEST_CASE(cylinder_to_cartesian_test) { + constexpr auto eps = 1e-14; + auto const cyl = Vector3d{{1.0, Utils::pi() / 4, 2.0}}; + auto const pos = transform_coordinate_cylinder_to_cartesian(cyl); + BOOST_CHECK_SMALL(pos[0] - std::sqrt(2) / 2, eps); + BOOST_CHECK_SMALL(pos[1] - std::sqrt(2) / 2, eps); + BOOST_CHECK_SMALL(pos[2] - cyl[2], eps); +} + +BOOST_AUTO_TEST_CASE(cylinder_to_cartesian_with_axis_and_orientation_test) { + constexpr auto eps = 2e-14; Vector3d const cylinder_coord{{1.2, 3.123, 42.0}}; - auto const transformed_x = transform_coordinate_cylinder_to_cartesian( - cylinder_coord, Vector3d{{1, 0, 0}}); - auto const transformed_y = transform_coordinate_cylinder_to_cartesian( - cylinder_coord, Vector3d{{0, 1, 0}}); - auto const transformed_z = transform_coordinate_cylinder_to_cartesian( - cylinder_coord, Vector3d{{0, 0, 1}}); + auto const e_x = Vector3d{{1., 0., 0.}}; + auto const e_y = Vector3d{{0., 1., 0.}}; + auto const e_z = Vector3d{{0., 0., 1.}}; + + auto const transformed_x = + transform_coordinate_cylinder_to_cartesian(cylinder_coord, e_x, -e_z); + auto const transformed_y = + transform_coordinate_cylinder_to_cartesian(cylinder_coord, e_y, e_x); + auto const transformed_z = + transform_coordinate_cylinder_to_cartesian(cylinder_coord, e_z, e_x); // We transform from cylinder zu cartesian and have to rotate back. See test // cartesian_to_cylinder_test. - auto const expected_x = - vec_rotate(Vector3d{{0.0, 1.0, 0.0}}, Utils::pi() / 2.0, - transform_coordinate_cylinder_to_cartesian( - cylinder_coord, Vector3d{{0, 0, 1}})); - auto const expected_y = - vec_rotate(Vector3d{{1.0, 0.0, 0.0}}, -Utils::pi() / 2.0, - transform_coordinate_cylinder_to_cartesian( - cylinder_coord, Vector3d{{0, 0, 1}})); + auto const expected_x = vec_rotate( + e_y, Utils::pi() / 2.0, + transform_coordinate_cylinder_to_cartesian(cylinder_coord, e_z, e_x)); + auto const expected_y = vec_rotate( + e_x, -Utils::pi() / 2.0, + transform_coordinate_cylinder_to_cartesian(cylinder_coord, e_z, e_x)); // x = r * cos(phi); y = r * sin(phi); z = z auto const expected_z = Vector3d{ {cylinder_coord[0] * std::cos(cylinder_coord[1]), cylinder_coord[0] * std::sin(cylinder_coord[1]), cylinder_coord[2]}}; for (int i = 0; i < 3; ++i) { - BOOST_CHECK(transformed_x[i] == expected_x[i]); - BOOST_CHECK(transformed_y[i] == expected_y[i]); - BOOST_CHECK(transformed_z[i] == expected_z[i]); + BOOST_CHECK_SMALL(transformed_x[i] - expected_x[i], eps); + BOOST_CHECK_SMALL(transformed_y[i] - expected_y[i], eps); + BOOST_CHECK_SMALL(transformed_z[i] - expected_z[i], eps); } } -BOOST_AUTO_TEST_CASE(vector_cart_to_cyl_test) { - constexpr auto eps = 1e-13; - Vector3d const pos{{1.1, 2.2, 3.3}}; - auto const axis = (Vector3d{{4.4, 5.5, 6.6}}).normalized(); - Vector3d const vec{{7.7, 8.8, 9.9}}; +BOOST_AUTO_TEST_CASE(cylinder_to_cartesian_with_axis_with_phi_2_test) { + constexpr auto eps = 1e-14; + // tilted orthogonal basis + auto const y = (Vector3d{{0, 1, -1}}).normalize(); + auto const z = (Vector3d{{1, 1, 1}}).normalize(); + auto const x = Utils::vector_product(y, z); - auto const vec_cyl = transform_vector_cartesian_to_cylinder(vec, axis, pos); - - // cylindrical basis vectors at pos - auto const e_z = axis; - auto const e_r = (pos - (pos * axis) * axis).normalized(); - auto const e_phi = Utils::vector_product(e_z, e_r); - - BOOST_CHECK_SMALL(vec_cyl[0] - vec * e_r, eps); - BOOST_CHECK_SMALL(vec_cyl[1] - vec * e_phi, eps); - BOOST_CHECK_SMALL(vec_cyl[2] - vec * e_z, eps); + // check transformation with orientation + { + auto const x_cyl = transform_coordinate_cartesian_to_cylinder(x, z, y); + auto const y_cyl = transform_coordinate_cartesian_to_cylinder(y, z, y); + auto const z_cyl = transform_coordinate_cartesian_to_cylinder(z, z, y); + auto const x_cart = transform_coordinate_cylinder_to_cartesian(x_cyl, z, y); + auto const y_cart = transform_coordinate_cylinder_to_cartesian(y_cyl, z, y); + auto const z_cart = transform_coordinate_cylinder_to_cartesian(z_cyl, z, y); + for (int i = 0; i < 3; ++i) { + BOOST_CHECK_SMALL(x_cart[i] - x[i], eps); + BOOST_CHECK_SMALL(y_cart[i] - y[i], eps); + BOOST_CHECK_SMALL(z_cart[i] - z[i], eps); + } + } + // check transformation with orientation for another angle + { + auto const u = vec_rotate(z, Utils::pi() / 3.0, x); + auto const v = vec_rotate(z, Utils::pi() / 3.0, y); + auto const u_cyl = transform_coordinate_cartesian_to_cylinder(u, z, y); + auto const v_cyl = transform_coordinate_cartesian_to_cylinder(v, z, y); + auto const u_cart = transform_coordinate_cylinder_to_cartesian(u_cyl, z, y); + auto const v_cart = transform_coordinate_cylinder_to_cartesian(v_cyl, z, y); + for (int i = 0; i < 3; ++i) { + BOOST_CHECK_SMALL(u_cart[i] - u[i], eps); + BOOST_CHECK_SMALL(v_cart[i] - v[i], eps); + } + } + // check transformation of random vectors + { + std::subtract_with_carry_engine rng(2); + auto const r_uniform = [&rng]() { + return static_cast(rng() - rng.min()) / (rng.max() - rng.min()); + }; + for (int trial = 0; trial < 100; ++trial) { + Vector3d const v1{r_uniform(), r_uniform(), r_uniform()}; + Vector3d const v2{r_uniform(), r_uniform(), r_uniform()}; + auto const a = Utils::vector_product(v1, v2) / v1.norm() / v2.norm(); + auto const v3 = transform_coordinate_cartesian_to_cylinder(v2, a, v1); + auto const v4 = transform_coordinate_cylinder_to_cartesian(v3, a, v1); + for (int i = 0; i < 3; ++i) { + BOOST_CHECK_SMALL(v4[i] - v2[i], eps); + } + } + } } diff --git a/src/utils/tests/gather_buffer_test.cpp b/src/utils/tests/gather_buffer_test.cpp index 11036c3d6e8..cfca73b204e 100644 --- a/src/utils/tests/gather_buffer_test.cpp +++ b/src/utils/tests/gather_buffer_test.cpp @@ -37,39 +37,6 @@ using Utils::Mpi::gather_buffer; namespace mpi = boost::mpi; -void check_pointer(const mpi::communicator &comm, int root) { - if (comm.rank() == root) { - auto const n = comm.size(); - const int total_size = n * (n + 1) / 2; - - std::vector buf(total_size, comm.rank() + 1); - auto const ret_size = - gather_buffer(buf.data(), comm.rank() + 1, comm, root); - - BOOST_CHECK(ret_size == total_size); - - /* Check order in result */ - BOOST_CHECK(std::is_sorted(buf.begin(), buf.end())); - - /* Check values */ - for (int i = 1; i <= n; i++) { - std::vector::iterator lower, upper; - std::tie(lower, upper) = std::equal_range(buf.begin(), buf.end(), i); - - BOOST_CHECK(i == std::distance(lower, upper)); - } - } else { - std::vector buf(comm.rank() + 1, comm.rank() + 1); - gather_buffer(buf.data(), buf.size(), comm, root); - - /* Check that buffer is unchanged */ - BOOST_CHECK(buf.size() == comm.rank() + 1); - for (auto const &i : buf) { - BOOST_CHECK(i == comm.rank() + 1); - } - } -} - void check_vector(const mpi::communicator &comm, int root) { std::vector buf(comm.rank() + 1, comm.rank() + 1); @@ -123,28 +90,6 @@ void check_vector_out_of_bounds(const mpi::communicator &comm) { } } -void check_pointer_out_of_bounds(const mpi::communicator &comm) { - /* Check that moving data in the buffer on the root doesn't lead - * to an access out of bounds (using a sentinel value) */ - const auto root = 1; - if (comm.rank() == 1) { - std::vector buf = {2, 2, 0, -1}; - gather_buffer(buf.data(), 2, comm, root); - BOOST_CHECK(buf.size() == 4); - BOOST_CHECK(buf[0] == 1); - BOOST_CHECK(buf[1] == 2); - BOOST_CHECK(buf[2] == 2); - BOOST_CHECK(buf[3] == -1); - } else if (comm.rank() == 0) { - std::vector buf = {1}; - gather_buffer(buf.data(), 1, comm, root); - BOOST_CHECK(buf[0] == 1); - } else { - std::vector buf = {}; - gather_buffer(buf.data(), 0, comm, root); - } -} - void check_vector_empty(const mpi::communicator &comm, int empty) { std::vector buf((comm.rank() == empty) ? 0 : 11, comm.rank()); gather_buffer(buf, comm); @@ -165,54 +110,6 @@ void check_vector_empty(const mpi::communicator &comm, int empty) { } } -void check_pointer_empty(const mpi::communicator &comm, int empty) { - auto const n_elem = (comm.rank() == empty) ? 0 : 11; - std::vector buf(n_elem, comm.rank()); - - if (comm.rank() == 0) { - buf.resize((comm.size() - 1) * 11); - } - - gather_buffer(buf.data(), n_elem, comm); - - if (comm.rank() == 0) { - for (int i = 0; i < comm.size(); i++) { - std::vector::iterator lower, upper; - std::tie(lower, upper) = std::equal_range(buf.begin(), buf.end(), i); - - if (i == empty) { - BOOST_CHECK(0 == std::distance(lower, upper)); - } else { - BOOST_CHECK(11 == std::distance(lower, upper)); - } - } - } -} - -BOOST_AUTO_TEST_CASE(pointer) { - mpi::communicator world; - check_pointer(world, 0); -} - -BOOST_AUTO_TEST_CASE(pointer_overlap) { - mpi::communicator world; - if (world.size() >= 2) - check_pointer(world, 1); -} - -BOOST_AUTO_TEST_CASE(pointer_out_of_bounds) { - mpi::communicator world; - if (world.size() >= 2) - check_pointer_out_of_bounds(world); -} - -BOOST_AUTO_TEST_CASE(pointer_root) { - mpi::communicator world; - - auto root = (world.size() >= 3) ? world.size() - 2 : world.size() - 1; - check_pointer(world, root); -} - BOOST_AUTO_TEST_CASE(vector) { mpi::communicator world; check_vector(world, 0); @@ -250,19 +147,6 @@ BOOST_AUTO_TEST_CASE(vector_empty_root) { check_vector_empty(world, root); } -BOOST_AUTO_TEST_CASE(pointer_empty) { - mpi::communicator world; - - check_pointer_empty(world, 0); -} - -BOOST_AUTO_TEST_CASE(pointer_empty_root) { - mpi::communicator world; - auto root = (world.size() >= 3) ? world.size() - 2 : world.size() - 1; - - check_pointer_empty(world, root); -} - BOOST_AUTO_TEST_CASE(non_trivial_type) { mpi::communicator world; diff --git a/src/utils/tests/matrix_test.cpp b/src/utils/tests/matrix_test.cpp index 8030490d2ba..d3a9533cd2f 100644 --- a/src/utils/tests/matrix_test.cpp +++ b/src/utils/tests/matrix_test.cpp @@ -1,6 +1,8 @@ /* * Copyright (C) 2018-2019 The ESPResSo project * + * This file is part of ESPResSo. + * * ESPResSo is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or diff --git a/src/utils/tests/orthonormal_vec_test.cpp b/src/utils/tests/orthonormal_vec_test.cpp new file mode 100644 index 00000000000..51f9679dfe2 --- /dev/null +++ b/src/utils/tests/orthonormal_vec_test.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2019 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#define BOOST_TEST_MODULE Utils::orthonormal_vec test +#define BOOST_TEST_DYN_LINK +#include + +#include +#include + +BOOST_AUTO_TEST_CASE(orthonormal_vec_test) { + constexpr auto eps = 1e-14; + + auto const v0 = Utils::Vector3d{{1.1, -2.2, 3.3}}; + auto v0_orth = Utils::calc_orthonormal_vector(v0); + BOOST_CHECK_SMALL(v0 * v0_orth, eps); + BOOST_CHECK_SMALL(1 - v0_orth.norm(), eps); + + auto const v1 = Utils::VectorXd<2>{{1., 0.}}; + auto v1_orth = Utils::calc_orthonormal_vector(v1); + BOOST_CHECK_SMALL(v1 * v1_orth, eps); + BOOST_CHECK_SMALL(1 - v1_orth.norm(), eps); +} \ No newline at end of file diff --git a/src/utils/tests/vec_rotate_test.cpp b/src/utils/tests/vec_rotate_test.cpp index 18b9b14ef45..f1d5727a2cd 100644 --- a/src/utils/tests/vec_rotate_test.cpp +++ b/src/utils/tests/vec_rotate_test.cpp @@ -22,11 +22,9 @@ #include #include #include -using Utils::vec_rotate; #include #include -#include BOOST_AUTO_TEST_CASE(rotation) { using std::cos; @@ -43,20 +41,16 @@ BOOST_AUTO_TEST_CASE(rotation) { auto const expected = cos(t) * v + sin(t) * vector_product(k, v) + (1. - cos(t)) * (k * v) * k; - auto const is = vec_rotate(k, t, v); + auto const is = Utils::vec_rotate(k, t, v); auto const rel_diff = (expected - is).norm() / expected.norm(); BOOST_CHECK(rel_diff < std::numeric_limits::epsilon()); } -BOOST_AUTO_TEST_CASE(rotation_params) { - Utils::Vector3d v1 = {1.0, 0.0, 0.0}; - Utils::Vector3d v2 = {1.0, 1.0, 0.0}; +BOOST_AUTO_TEST_CASE(angle_between) { + Utils::Vector3d const v1 = {1.0, 0.0, 0.0}; + Utils::Vector3d const v2 = {1.0, 1.0, 0.0}; - double angle; - Utils::Vector3d rotation_axis; - std::tie(angle, rotation_axis) = Utils::rotation_params(v1, v2); + auto const angle = Utils::angle_between(v1, v2); BOOST_CHECK_CLOSE(angle, Utils::pi() / 4.0, 1e-7); - BOOST_CHECK_SMALL((rotation_axis * v1), 1e-7); - BOOST_CHECK_SMALL((rotation_axis * v2), 1e-7); } diff --git a/testsuite/python/CMakeLists.txt b/testsuite/python/CMakeLists.txt index ec8ff4c9dd6..f13ae95562d 100644 --- a/testsuite/python/CMakeLists.txt +++ b/testsuite/python/CMakeLists.txt @@ -57,7 +57,7 @@ endfunction(PYTHON_TEST) # Separate features with hyphens, use a period to add an optional flag. foreach( TEST_COMBINATION - lb.cpu-p3m.cpu-lj-therm.lb;lb.gpu-p3m.cpu-lj-therm.lb;ek.gpu;lb.off-therm.npt-int.npt;lb.off-int.sd;lb.off-therm.langevin-int.nvt;lb.off-therm.dpd-int.nvt;lb.off-therm.bd-int.bd;lb.off-therm.sdm-int.sdm + lb.cpu-p3m.cpu-lj-therm.lb;lb.gpu-p3m.elc-lj-therm.lb;ek.gpu;lb.off-therm.npt-int.npt;lb.off-int.sd;lb.off-dp3m.cpu-therm.langevin-int.nvt;lb.off-therm.dpd-int.nvt;lb.off-scafacos-therm.bd-int.bd;lb.off-therm.sdm-int.sdm ) if(${TEST_COMBINATION} MATCHES "\\.gpu") set(TEST_LABELS "gpu") @@ -125,9 +125,9 @@ python_test(FILE lb_stokes_sphere.py MAX_NUM_PROC 4 LABELS gpu long) python_test(FILE lb_pressure_tensor.py MAX_NUM_PROC 1 LABELS gpu long) python_test(FILE ek_fluctuations.py MAX_NUM_PROC 1 LABELS gpu) python_test(FILE ek_charged_plate.py MAX_NUM_PROC 1 LABELS gpu) -python_test(FILE ek_eof_one_species_x.py MAX_NUM_PROC 1 LABELS gpu) -python_test(FILE ek_eof_one_species_y.py MAX_NUM_PROC 1 LABELS gpu) -python_test(FILE ek_eof_one_species_z.py MAX_NUM_PROC 1 LABELS gpu) +python_test(FILE ek_eof_one_species.py MAX_NUM_PROC 1 LABELS gpu SUFFIX x) +python_test(FILE ek_eof_one_species.py MAX_NUM_PROC 1 LABELS gpu SUFFIX y) +python_test(FILE ek_eof_one_species.py MAX_NUM_PROC 1 LABELS gpu SUFFIX z) python_test(FILE exclusions.py MAX_NUM_PROC 2) python_test(FILE langevin_thermostat.py MAX_NUM_PROC 1) python_test(FILE langevin_thermostat_stats.py MAX_NUM_PROC 1 LABELS long) @@ -143,7 +143,7 @@ python_test(FILE integrator_npt_stats.py MAX_NUM_PROC 4 LABELS long) python_test(FILE integrator_steepest_descent.py MAX_NUM_PROC 4) python_test(FILE dipolar_mdlc_p3m_scafacos_p2nfft.py MAX_NUM_PROC 1) python_test(FILE dipolar_direct_summation.py MAX_NUM_PROC 1 LABELS gpu) -python_test(FILE dipolar_p3m.py MAX_NUM_PROC 1) +python_test(FILE dipolar_p3m.py MAX_NUM_PROC 2) python_test(FILE dipolar_interface.py MAX_NUM_PROC 1 LABELS gpu) python_test(FILE lb.py MAX_NUM_PROC 2 LABELS gpu) python_test(FILE lb_stats.py MAX_NUM_PROC 2 LABELS gpu long) @@ -207,7 +207,7 @@ python_test(FILE sigint.py DEPENDENCIES sigint_child.py MAX_NUM_PROC 1) python_test(FILE lb_density.py MAX_NUM_PROC 1) python_test(FILE observable_chain.py MAX_NUM_PROC 4) python_test(FILE mpiio.py MAX_NUM_PROC 4) -python_test(FILE gpu_availability.py MAX_NUM_PROC 1 LABELS gpu) +python_test(FILE gpu_availability.py MAX_NUM_PROC 2 LABELS gpu) python_test(FILE features.py MAX_NUM_PROC 1) python_test(FILE galilei.py MAX_NUM_PROC 32) python_test(FILE linear_momentum.py MAX_NUM_PROC 4) @@ -222,6 +222,10 @@ python_test(FILE rotation.py MAX_NUM_PROC 1) python_test(FILE shapes.py MAX_NUM_PROC 1) python_test(FILE h5md.py MAX_NUM_PROC 2) python_test(FILE mdanalysis.py MAX_NUM_PROC 2) +python_test(FILE p3m_fft.py MAX_NUM_PROC 6) +if(${TEST_NP} GREATER_EQUAL 8) + python_test(FILE p3m_fft.py MAX_NUM_PROC 8 SUFFIX 8_cores) +endif() python_test(FILE p3m_tuning_exceptions.py MAX_NUM_PROC 1 LABELS gpu) python_test(FILE integrator_exceptions.py MAX_NUM_PROC 1) python_test(FILE utils.py MAX_NUM_PROC 1) @@ -244,10 +248,6 @@ add_custom_target( ${CMAKE_CURRENT_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/thermostats_common.py - ${CMAKE_CURRENT_BINARY_DIR} - COMMAND - ${CMAKE_COMMAND} -E copy - ${CMAKE_CURRENT_SOURCE_DIR}/ek_eof_one_species_base.py ${CMAKE_CURRENT_BINARY_DIR}) add_custom_target( diff --git a/testsuite/python/cellsystem.py b/testsuite/python/cellsystem.py index 3725d3c7434..a5a2b5358f9 100644 --- a/testsuite/python/cellsystem.py +++ b/testsuite/python/cellsystem.py @@ -24,6 +24,7 @@ class CellSystem(ut.TestCase): system = espressomd.System(box_l=[5.0, 5.0, 5.0]) system.cell_system.skin = 0.0 + n_nodes = system.cell_system.get_state()['n_nodes'] def test_cell_system(self): self.system.cell_system.set_n_square(use_verlet_lists=False) @@ -34,15 +35,15 @@ def test_cell_system(self): self.assertEqual( [s['use_verlet_list'], s['type']], [1, "domain_decomposition"]) + @ut.skipIf(n_nodes == 1, "Skipping test: only runs for n_nodes >= 2") def test_node_grid(self): self.system.cell_system.set_domain_decomposition() - n_nodes = self.system.cell_system.get_state()['n_nodes'] - if n_nodes == 1: - return - self.system.cell_system.node_grid = [n_nodes, 1, 1] - s = self.system.cell_system.get_state() - np.testing.assert_array_equal( - s['node_grid'], [n_nodes, 1, 1]) + for i in range(3): + node_grid_ref = [1, 1, 1] + node_grid_ref[i] = self.n_nodes + self.system.cell_system.node_grid = node_grid_ref + node_grid = self.system.cell_system.get_state()['node_grid'] + np.testing.assert_array_equal(node_grid, node_grid_ref) if __name__ == "__main__": diff --git a/testsuite/python/collision_detection.py b/testsuite/python/collision_detection.py index 42edbf0ac28..9ef1851b252 100644 --- a/testsuite/python/collision_detection.py +++ b/testsuite/python/collision_detection.py @@ -63,9 +63,9 @@ def test_00_interface_and_defaults(self): self.s.collision_detection.mode = "bind_centers" # Verify exception throwing for unknown collision modes - with self.assertRaises(Exception): - self.s.collision_detection.set_params(mode=0) - self.s.collision_detection.set_params(mode="blahblah") + for unknown_mode in (0, "unknown"): + with self.assertRaisesRegex(Exception, "Mode not handled"): + self.s.collision_detection.set_params(mode=unknown_mode) # That should work self.s.collision_detection.set_params(mode="off") @@ -496,7 +496,12 @@ def test_glue_to_surface_random(self): # Collision detection self.s.collision_detection.set_params( - mode="glue_to_surface", distance=0.11, distance_glued_particle_to_vs=0.02, bond_centers=self.H, bond_vs=self.H2, part_type_vs=self.part_type_vs, part_type_to_attach_vs_to=self.part_type_to_attach_vs_to, part_type_to_be_glued=self.part_type_to_be_glued, part_type_after_glueing=self.part_type_after_glueing) + mode="glue_to_surface", distance=0.11, + distance_glued_particle_to_vs=0.02, bond_centers=self.H, + bond_vs=self.H2, part_type_vs=self.part_type_vs, + part_type_to_attach_vs_to=self.part_type_to_attach_vs_to, + part_type_to_be_glued=self.part_type_to_be_glued, + part_type_after_glueing=self.part_type_after_glueing) self.get_state_set_state_consistency() # Integrate lj liquid @@ -703,7 +708,7 @@ def verify_triangle_binding(self, distance, first_bond, angle_res): expected_angle_bonds = sorted(expected_angle_bonds) self.assertEqual(expected_pairs, found_pairs) - if not expected_angle_bonds == found_angle_bonds: + if expected_angle_bonds != found_angle_bonds: # Verbose info print("expected:", expected_angle_bonds) missing = [] diff --git a/testsuite/python/constraint_shape_based.py b/testsuite/python/constraint_shape_based.py index a58b9fbf610..f3ea78b9456 100644 --- a/testsuite/python/constraint_shape_based.py +++ b/testsuite/python/constraint_shape_based.py @@ -107,6 +107,50 @@ def z(y, r1, r2, l): return l / (r1 - r2) * \ self.assertLess(shape.calc_distance( position=[0.0, R1 - (0.5 + sys.float_info.epsilon) * D, 0.25 * LENGTH])[0], 0.0) + def test_simplepore(self): + """ + Test implementation of simplepore shape. + + """ + RADIUS = 12.5 + LENGTH = 15.0 + CENTER = 3 * [self.box_l / 2] + AXIS = [1, 0, 0] + SRADIUS = 2 + + shape = espressomd.shapes.SimplePore( + center=CENTER, axis=AXIS, length=LENGTH, radius=RADIUS, + smoothing_radius=SRADIUS) + + # check distances inside cylinder + for x in np.linspace(self.box_l / 2 - LENGTH / 2 + SRADIUS, + self.box_l / 2 + LENGTH / 2 - SRADIUS, 10): + for y in np.linspace(0, RADIUS, 5): + dist = shape.calc_distance( + position=[x, self.box_l / 2 + y, self.box_l / 2]) + self.assertAlmostEqual(dist[0], RADIUS - y) + + # check distances near the walls + for y in np.linspace(0, self.box_l / 2 - RADIUS - SRADIUS, 6): + for z in np.linspace(0, self.box_l / 2 - RADIUS - SRADIUS, 6): + for x in np.linspace(0, self.box_l / 2 - LENGTH / 2, 6): + dist_to_x = (self.box_l / 2 - LENGTH / 2 - x) + dist = shape.calc_distance( + position=[x, y, self.box_l - z]) + np.testing.assert_almost_equal( + np.copy(dist[1]), [-dist_to_x, 0, 0]) + dist = shape.calc_distance( + position=[self.box_l - x, self.box_l - y, z]) + np.testing.assert_almost_equal( + np.copy(dist[1]), [dist_to_x, 0, 0]) + + # check getters + self.assertAlmostEqual(shape.radius, RADIUS) + self.assertAlmostEqual(shape.length, LENGTH) + self.assertAlmostEqual(shape.smoothing_radius, SRADIUS) + np.testing.assert_almost_equal(np.copy(shape.axis), AXIS) + np.testing.assert_almost_equal(np.copy(shape.center), CENTER) + def test_sphere(self): """Checks geometry of an inverted sphere @@ -205,6 +249,8 @@ def test_ellipsoid(self): # change ellipsoid parameters instead of creating a new constraint e.a = 1. e.b = 1. + self.assertAlmostEqual(e.a, 1.) + self.assertAlmostEqual(e.b, 1.) radii = np.linspace(1., 6.5, 7) @@ -331,6 +377,18 @@ def test_cylinder(self): dist = -distance self.assertAlmostEqual(shape_dist, dist) + + # check getters + self.assertAlmostEqual(cylinder_shape_finite.radius, rad) + self.assertAlmostEqual(cylinder_shape_finite.length, length) + np.testing.assert_almost_equal( + np.copy(cylinder_shape_finite.axis), [0, 0, 1]) + np.testing.assert_almost_equal( + np.copy(cylinder_shape_finite.center), 3 * [rad]) + self.assertFalse(cylinder_shape_finite.open) + cylinder_shape_finite.open = True + self.assertTrue(cylinder_shape_finite.open) + # Reset system.non_bonded_inter[0, 1].lennard_jones.set_params( epsilon=0.0, sigma=0.0, cutoff=0.0, shift=0) @@ -432,6 +490,14 @@ def test_spherocylinder(self): energy = system.analysis.energy() self.assertAlmostEqual(energy["total"], 10. - r) + # check getters + self.assertAlmostEqual(spherocylinder_shape.radius, 10.) + self.assertAlmostEqual(spherocylinder_shape.length, 6.0) + np.testing.assert_almost_equal( + np.copy(spherocylinder_shape.axis), [0, 1, 0]) + np.testing.assert_almost_equal( + np.copy(spherocylinder_shape.center), 3 * [self.box_l / 2.0]) + # Reset system.non_bonded_inter[0, 1].generic_lennard_jones.set_params( epsilon=0., sigma=0., cutoff=0., shift=0., offset=0., e1=0, e2=0, b1=0., b2=0.) @@ -866,6 +932,13 @@ def test_torus(self): position=phi_rot_point.tolist()) self.assertAlmostEqual(shape_dist, distance) + # check getters + self.assertAlmostEqual(torus_shape.radius, radius) + self.assertAlmostEqual(torus_shape.tube_radius, tube_radius) + np.testing.assert_almost_equal(np.copy(torus_shape.normal), [0, 0, 1]) + np.testing.assert_almost_equal( + np.copy(torus_shape.center), 3 * [self.box_l / 2.0]) + # Reset system.non_bonded_inter[0, 1].lennard_jones.set_params( epsilon=0.0, sigma=0.0, cutoff=0.0, shift=0) diff --git a/testsuite/python/coulomb_cloud_wall.py b/testsuite/python/coulomb_cloud_wall.py index 494d66418bb..2fd2ce38ed5 100644 --- a/testsuite/python/coulomb_cloud_wall.py +++ b/testsuite/python/coulomb_cloud_wall.py @@ -115,41 +115,6 @@ def test_p3m_gpu(self): self.S.integrator.run(0) self.compare("p3m_gpu", energy=False, prefactor=2.2) - @ut.skipIf(not espressomd.has_features(["SCAFACOS"]) - or 'p3m' not in scafacos.available_methods(), - 'Skipping test: missing feature SCAFACOS or p3m method') - def test_scafacos_p3m(self): - self.S.actors.add( - espressomd.electrostatics.Scafacos( - prefactor=0.5, - method_name="p3m", - method_params={ - "p3m_r_cut": 1.001, - "p3m_grid": 64, - "p3m_cao": 7, - "p3m_alpha": 2.70746})) - self.S.integrator.run(0) - self.compare("scafacos_p3m", energy=True, prefactor=0.5) - - @ut.skipIf(not espressomd.has_features(["SCAFACOS"]) - or 'p3m' not in scafacos.available_methods(), - 'Skipping test: missing feature SCAFACOS or p3m method') - def test_scafacos_p3m_tuning(self): - # check that the tuning function can be called without throwing - # an exception or causing an MPI deadlock - self.S.actors.add( - espressomd.electrostatics.Scafacos( - prefactor=0.5, - method_name="p3m", - method_params={ - "p3m_r_cut": -1.5, - "p3m_grid": 64, - "p3m_cao": 7, - "p3m_alpha": 2.70746})) - self.S.integrator.run(0) - # check the scafacos script interface - self.assertEqual(self.S.actors[-1].get_params()['prefactor'], 0.5) - @ut.skipIf(not espressomd.has_features("SCAFACOS") or 'p2nfft' not in scafacos.available_methods(), 'Skipping test: missing feature SCAFACOS or p2nfft method') diff --git a/testsuite/python/coulomb_mixed_periodicity.py b/testsuite/python/coulomb_mixed_periodicity.py index c275d3a9f7e..30f84005926 100644 --- a/testsuite/python/coulomb_mixed_periodicity.py +++ b/testsuite/python/coulomb_mixed_periodicity.py @@ -20,7 +20,7 @@ import unittest_decorators as utx import numpy as np import espressomd -from espressomd import electrostatics, electrostatic_extensions, scafacos +from espressomd import electrostatics, scafacos import tests_common @@ -44,7 +44,6 @@ def setUp(self): self.S.box_l = (10, 10, 10) self.S.time_step = 0.01 self.S.cell_system.skin = 0. - self.S.actors.clear() data = np.genfromtxt(tests_common.abspath( "data/coulomb_mixed_periodicity_system.data")) @@ -61,6 +60,7 @@ def setUp(self): def tearDown(self): self.S.part.clear() + self.S.actors.clear() def compare(self, method_name, energy=True): # Compare forces and energy now in the system to stored ones @@ -84,7 +84,7 @@ def compare(self, method_name, energy=True): # Tests for individual methods @utx.skipIfMissingFeatures(["P3M"]) - def test_zz_p3mElc(self): + def test_elc(self): # Make sure, the data satisfies the gap for p in self.S.part: if p.pos[2] < 0 or p.pos[2] > 9.: @@ -97,13 +97,11 @@ def test_zz_p3mElc(self): self.S.box_l = (10, 10, 10) p3m = electrostatics.P3M(prefactor=1, accuracy=1e-6, mesh=(64, 64, 64)) + elc = electrostatics.ELC(p3m_actor=p3m, maxPWerror=1E-6, gap_size=1) - self.S.actors.add(p3m) - elc = electrostatic_extensions.ELC(maxPWerror=1E-6, gap_size=1) self.S.actors.add(elc) self.S.integrator.run(0) self.compare("elc", energy=True) - self.S.actors.remove(p3m) @ut.skipIf(not espressomd.has_features("SCAFACOS") or 'p2nfft' not in scafacos.available_methods(), @@ -125,7 +123,6 @@ def test_scafacos_p2nfft(self): self.S.actors.add(scafacos) self.S.integrator.run(0) self.compare("scafacos_p2nfft", energy=True) - self.S.actors.remove(scafacos) if __name__ == "__main__": diff --git a/testsuite/python/dawaanr-and-bh-gpu.py b/testsuite/python/dawaanr-and-bh-gpu.py index 9be6a09c992..bc7a6b6f5dc 100644 --- a/testsuite/python/dawaanr-and-bh-gpu.py +++ b/testsuite/python/dawaanr-and-bh-gpu.py @@ -17,16 +17,13 @@ import unittest as ut import unittest_decorators as utx import numpy as np +import tests_common import espressomd import espressomd.magnetostatics import espressomd.analyze import espressomd.cuda_init - - -def stopAll(system): - system.part[:].v = np.zeros(3) - system.part[:].omega_body = np.zeros(3) +import espressomd.galilei @utx.skipIfMissingGPU() @@ -48,34 +45,22 @@ def test(self): pf_bh_gpu = 2.34 pf_dawaanr = 3.524 ratio_dawaanr_bh_gpu = pf_dawaanr / pf_bh_gpu - l = 15 - self.system.box_l = [l, l, l] + self.system.box_l = 3 * [15] self.system.periodicity = [0, 0, 0] self.system.time_step = 1E-4 self.system.cell_system.skin = 0.1 - part_dip = np.zeros((3)) - for n in [128, 541]: dipole_modulus = 1.3 - # scale the box for a large number of particles: - if n > 1000: - l *= (n / 541) ** (1 / 3.0) - for i in range(n): - part_pos = np.array(np.random.random(3)) * l - costheta = 2 * np.random.random() - 1 - sintheta = np.sin(np.arcsin(costheta)) - phi = 2 * np.pi * np.random.random() - part_dip[0] = sintheta * np.cos(phi) * dipole_modulus - part_dip[1] = sintheta * np.sin(phi) * dipole_modulus - part_dip[2] = costheta * dipole_modulus - self.system.part.add(id=i, type=0, pos=part_pos, dip=part_dip, - v=np.array([0, 0, 0]), omega_body=np.array([0, 0, 0])) + part_dip = dipole_modulus * tests_common.random_dipoles(n) + part_pos = np.random.random((n, 3)) * self.system.box_l[0] + self.system.part.add(pos=part_pos, dip=part_dip) self.system.non_bonded_inter[0, 0].lennard_jones.set_params( epsilon=10.0, sigma=0.5, cutoff=0.55, shift="auto") self.system.thermostat.set_langevin(kT=0.0, gamma=10.0, seed=42) - stopAll(self.system) + g = espressomd.galilei.GalileiTransform() + g.kill_particle_motion(rotation=True) self.system.integrator.set_vv() self.system.non_bonded_inter[0, 0].lennard_jones.set_params( diff --git a/testsuite/python/dawaanr-and-dds-gpu.py b/testsuite/python/dawaanr-and-dds-gpu.py index d613dff3bf5..3e67db7e514 100644 --- a/testsuite/python/dawaanr-and-dds-gpu.py +++ b/testsuite/python/dawaanr-and-dds-gpu.py @@ -16,13 +16,14 @@ # along with this program. If not, see . import unittest as ut import unittest_decorators as utx -from numpy.random import random +import tests_common import numpy as np import espressomd import espressomd.interactions import espressomd.magnetostatics import espressomd.analyze +import espressomd.galilei @utx.skipIfMissingGPU() @@ -31,37 +32,22 @@ class DDSGPUTest(ut.TestCase): # Handle for espresso system es = espressomd.System(box_l=[1.0, 1.0, 1.0]) - def stopAll(self): - for i in range(len(self.es.part)): - self.es.part[i].v = np.array([0.0, 0.0, 0.0]) - self.es.part[i].omega_body = np.array([0.0, 0.0, 0.0]) - @ut.skipIf(es.cell_system.get_state()["n_nodes"] > 1, "Skipping test: only runs for n_nodes == 1") def test(self): pf_dds_gpu = 2.34 pf_dawaanr = 3.524 ratio_dawaanr_dds_gpu = pf_dawaanr / pf_dds_gpu - l = 15 - self.es.box_l = [l, l, l] + self.es.box_l = 3 * [15] self.es.periodicity = [0, 0, 0] self.es.time_step = 1E-4 self.es.cell_system.skin = 0.1 - part_dip = np.zeros((3)) - for n in [128, 541]: dipole_modulus = 1.3 - for i in range(n): - part_pos = np.array(random(3)) * l - costheta = 2 * random() - 1 - sintheta = np.sin(np.arcsin(costheta)) - phi = 2 * np.pi * random() - part_dip[0] = sintheta * np.cos(phi) * dipole_modulus - part_dip[1] = sintheta * np.sin(phi) * dipole_modulus - part_dip[2] = costheta * dipole_modulus - self.es.part.add(id=i, type=0, pos=part_pos, dip=part_dip, - v=np.array([0, 0, 0]), omega_body=np.array([0, 0, 0])) + part_dip = dipole_modulus * tests_common.random_dipoles(n) + part_pos = np.random.random((n, 3)) * self.es.box_l[0] + self.es.part.add(pos=part_pos, dip=part_dip) self.es.non_bonded_inter[0, 0].lennard_jones.set_params( epsilon=10.0, sigma=0.5, cutoff=0.55, shift="auto") @@ -70,7 +56,8 @@ def test(self): self.es.integrator.set_steepest_descent( f_max=0.0, gamma=0.1, max_displacement=0.1) self.es.integrator.run(500) - self.stopAll() + g = espressomd.galilei.GalileiTransform() + g.kill_particle_motion(rotation=True) self.es.integrator.set_vv() self.es.non_bonded_inter[0, 0].lennard_jones.set_params( diff --git a/testsuite/python/dds-and-bh-gpu.py b/testsuite/python/dds-and-bh-gpu.py index e473e35b818..8aa23acee1b 100644 --- a/testsuite/python/dds-and-bh-gpu.py +++ b/testsuite/python/dds-and-bh-gpu.py @@ -23,11 +23,7 @@ import espressomd.magnetostatics import espressomd.analyze import espressomd.cuda_init - - -def stopAll(system): - system.part[:].v = np.zeros(3) - system.part[:].omega_body = np.zeros(3) +import espressomd.galilei @utx.skipIfMissingGPU() @@ -70,7 +66,8 @@ def test(self): self.system.non_bonded_inter[0, 0].lennard_jones.set_params( epsilon=10.0, sigma=0.5, cutoff=0.55, shift="auto") self.system.thermostat.set_langevin(kT=0.0, gamma=10.0, seed=42) - stopAll(self.system) + g = espressomd.galilei.GalileiTransform() + g.kill_particle_motion(rotation=True) self.system.integrator.set_vv() self.system.non_bonded_inter[0, 0].lennard_jones.set_params( diff --git a/testsuite/python/dipolar_direct_summation.py b/testsuite/python/dipolar_direct_summation.py index dd41b43b4bb..52de4e0f5c4 100644 --- a/testsuite/python/dipolar_direct_summation.py +++ b/testsuite/python/dipolar_direct_summation.py @@ -19,10 +19,11 @@ import espressomd import espressomd.magnetostatics import espressomd.magnetostatic_extensions +import os import numpy as np import unittest as ut -from tests_common import abspath import unittest_decorators as utx +from tests_common import abspath, random_dipoles OPEN_BOUNDARIES_REF_ENERGY = abspath("data/dipolar_open_boundaries_energy.npy") OPEN_BOUNDARIES_REF_ARRAYS = abspath("data/dipolar_open_boundaries_arrays.npy") @@ -34,7 +35,7 @@ class dds(ut.TestCase): system.time_step = 0.01 system.cell_system.skin = 0.1 - system.periodicity = 0, 0, 0 + system.periodicity = [False, False, False] def tearDown(self): self.system.part.clear() @@ -107,19 +108,27 @@ def fcs_data(self): @ut.skipIf(system.cell_system.get_state()["n_nodes"] > 1, "Skipping test: only runs for n_nodes == 1") - def gen_reference_data(self): + def test_gen_reference_data(self): + filepaths = ('dipolar_direct_summation_energy.npy', + 'dipolar_direct_summation_arrays.npy') + for filepath in filepaths: + if os.path.isfile(filepath): + os.remove(filepath) + + self.gen_reference_data(filepaths[0], filepaths[1]) + for filepath in filepaths: + self.assertTrue(os.path.isfile(filepath)) + + def gen_reference_data(self, filepath_energy=OPEN_BOUNDARIES_REF_ENERGY, + filepath_arrays=OPEN_BOUNDARIES_REF_ARRAYS): system = self.system + np.random.seed(42) # add particles N = 20 dipole_modulus = 1.3 part_pos = np.random.random((N, 3)) * system.box_l - costheta = 2 * np.random.random(N) - 1 - sintheta = np.sin(np.arcsin(costheta)) - phi = 2 * np.pi * np.random.random(N) - part_dip = np.array([sintheta * np.cos(phi) * dipole_modulus, - sintheta * np.sin(phi) * dipole_modulus, - costheta * dipole_modulus]).T + part_dip = dipole_modulus * random_dipoles(N) particles = system.part.add(pos=part_pos, dip=part_dip, rotation=N * [(1, 1, 1)]) @@ -137,12 +146,11 @@ def gen_reference_data(self): # compute forces and energies for dawaanr ref_e, ref_f, ref_t = self.dds_data() np.save( - OPEN_BOUNDARIES_REF_ENERGY, - np.array( - [ref_e]), + filepath_energy, + np.array([ref_e]), allow_pickle=False) np.save( - OPEN_BOUNDARIES_REF_ARRAYS, + filepath_arrays, np.hstack( (particles.pos_folded, particles.dip, @@ -190,7 +198,9 @@ def test_dds_gpu(self): force_tol=1E-4, torque_tol=1E-4) - @utx.skipIfMissingFeatures("SCAFACOS_DIPOLES") + @ut.skipIf(not espressomd.has_features("SCAFACOS_DIPOLES") or + "direct" not in espressomd.scafacos.available_methods(), + "Skipping test: missing SCAFACOS_DIPOLES or 'direct' method") def test_dds_scafacos(self): self.check_open_bc( self.fcs_data, diff --git a/testsuite/python/dipolar_interface.py b/testsuite/python/dipolar_interface.py index 5cf6d5baf2f..33c06aaf822 100644 --- a/testsuite/python/dipolar_interface.py +++ b/testsuite/python/dipolar_interface.py @@ -30,8 +30,8 @@ class MagnetostaticsInterface(ut.TestCase): def setUp(self): self.system.box_l = [10., 10., 10.] - self.system.part.add(id=0, pos=(0.1, 0.1, 0.1), dip=(1.3, 2.1, -6)) - self.system.part.add(id=1, pos=(0, 0, 0), dip=(7.3, 6.1, -4)) + self.system.part.add(pos=(0.0, 0.0, 0.0), dip=(1.3, 2.1, -6)) + self.system.part.add(pos=(0.1, 0.1, 0.1), dip=(7.3, 6.1, -4)) def tearDown(self): self.system.part.clear() diff --git a/testsuite/python/dipolar_p3m.py b/testsuite/python/dipolar_p3m.py index 1001f9e46e5..373885a236a 100644 --- a/testsuite/python/dipolar_p3m.py +++ b/testsuite/python/dipolar_p3m.py @@ -22,6 +22,7 @@ import numpy as np import espressomd.magnetostatics +import espressomd.magnetostatic_extensions @utx.skipIfMissingFeatures(["DP3M"]) @@ -30,33 +31,13 @@ class MagnetostaticsP3M(ut.TestCase): def setUp(self): self.system.box_l = [10., 10., 10.] - self.system.part.add(id=0, pos=(0.1, 0.1, 0.1), dip=(1.3, 2.1, -6)) - self.system.part.add(id=1, pos=(0, 0, 0), dip=(7.3, 6.1, -4)) + self.system.part.add(id=0, pos=[4.0, 2.0, 2.0], dip=(1.3, 2.1, -6)) + self.system.part.add(id=1, pos=[6.0, 2.0, 2.0], dip=(7.3, 6.1, -4)) def tearDown(self): self.system.part.clear() self.system.actors.clear() - def ref_values(self, epsilon=np.inf): - x = 1. / (1 + 2 * epsilon) - dp3m_energy = 1.66706 * x + 1.673333 - dp3m_torque1 = np.array([-0.5706503 * x + 2.561371, - -0.1812375 * x + 10.394144, - -0.2976916 * x + 9.965342]) - dp3m_torque2 = np.array([+0.3362938 * x + 1.854679, - -0.2269749 * x - 3.638175, - +0.5315054 * x + 8.487292]) - dp3m_force = np.array([-3.54175042, -4.6761059, 9.96632774]) - alpha, r_cut, mesh, cao = (9.056147262573242, 4.739799499511719, 49, 7) - dp3m_params = {'prefactor': 1.1, 'accuracy': 9.995178689932661e-07, - 'mesh': mesh, 'mesh_off': [0.5, 0.5, 0.5], - 'cao': cao, 'additional_mesh': [0.0, 0.0, 0.0], - 'alpha': alpha / 10, 'alpha_L': alpha, 'r_cut': r_cut, - 'r_cut_iL': r_cut / self.system.box_l[0], - 'cao_cut': 3 * [self.system.box_l[0] / mesh / 2 * cao], - 'a': 3 * [self.system.box_l[0] / mesh]} - return dp3m_params, dp3m_energy, dp3m_force, dp3m_torque1, dp3m_torque2 - if espressomd.has_features("DP3M"): test_DP3M = tests_common.generate_test_for_class( system, espressomd.magnetostatics.DipolarP3M, @@ -65,48 +46,95 @@ def ref_values(self, epsilon=np.inf): def test_dp3m(self): self.system.time_step = 0.01 - self.system.part[0].pos = [1.0, 2.0, 2.0] - self.system.part[1].pos = [3.0, 2.0, 2.0] - dp3m_params, dp3m_energy, dp3m_force, dp3m_torque1, dp3m_torque2 = self.ref_values() - dp3m = espressomd.magnetostatics.DipolarP3M(tune=False, **dp3m_params) + prefactor = 1.1 + box_vol = self.system.volume() + p1, p2 = self.system.part[:] + dip = np.copy(p1.dip + p2.dip) + dp3m_params = {'accuracy': 1e-6, + 'mesh': [49, 49, 49], + 'cao': 7, + 'r_cut': 4.739799499511719, + 'alpha': 0.9056147262573242} + mdlc_params = {'maxPWerror': 1e-5, 'gap_size': 5.} + + # reference values for energy and force calculated for prefactor = 1.1 + ref_dp3m_energy = 1.673333 + ref_dp3m_force = np.array([-3.54175042, -4.6761059, 9.96632774]) + ref_dp3m_torque1 = np.array([-3.29316117, -13.21245739, -5.33787892]) + ref_dp3m_torque2 = np.array([3.98103932, -7.47123148, -4.12823244]) + + # check metallic case + dp3m = espressomd.magnetostatics.DipolarP3M( + prefactor=prefactor, epsilon='metallic', tune=False, **dp3m_params) self.system.actors.add(dp3m) - self.assertAlmostEqual(self.system.analysis.energy()['dipolar'], - dp3m_energy, places=5) - # update forces and torques - self.system.integrator.run(0) - np.testing.assert_allclose(np.copy(self.system.part[0].f), - dp3m_force, atol=1E-5) - np.testing.assert_allclose(np.copy(self.system.part[1].f), - -dp3m_force, atol=1E-5) - np.testing.assert_allclose(np.copy(self.system.part[0].torque_lab), - dp3m_torque1, atol=1E-5) - np.testing.assert_allclose(np.copy(self.system.part[1].torque_lab), - dp3m_torque2, atol=1E-5) - - def test_dp3m_non_metallic(self): - self.system.time_step = 0.01 - self.system.part[0].pos = [1.0, 2.0, 2.0] - self.system.part[1].pos = [3.0, 2.0, 2.0] - for epsilon_power in range(-4, 5): - epsilon = 10**epsilon_power - dp3m_params, dp3m_energy, dp3m_force, dp3m_torque1, dp3m_torque2 = self.ref_values( - epsilon) + self.system.integrator.run(0, recalc_forces=True) + energy = self.system.analysis.energy()['dipolar'] + tol = 1e-5 + np.testing.assert_allclose(energy, ref_dp3m_energy, atol=tol) + np.testing.assert_allclose(np.copy(p1.f), ref_dp3m_force, atol=tol) + np.testing.assert_allclose(np.copy(p2.f), -ref_dp3m_force, atol=tol) + np.testing.assert_allclose( + np.copy(p1.convert_vector_space_to_body(p1.torque_lab)), + ref_dp3m_torque1, atol=tol) + np.testing.assert_allclose( + np.copy(p2.convert_vector_space_to_body(p2.torque_lab)), + ref_dp3m_torque2, atol=tol) + + # keep current values as reference to check for DP3M dipole correction + ref_dp3m_energy_metallic = self.system.analysis.energy()['dipolar'] + ref_dp3m_forces_metallic = np.copy(self.system.part[:].f) + ref_dp3m_torque_metallic = np.array([ + p1.convert_vector_space_to_body(p1.torque_lab), + p2.convert_vector_space_to_body(p2.torque_lab)]) + + # MDLC cancels out dipole correction + mdlc = espressomd.magnetostatic_extensions.DLC(**mdlc_params) + self.system.actors.add(mdlc) + + # keep current values as reference to check for MDLC dipole correction + self.system.integrator.run(0, recalc_forces=True) + ref_mdlc_energy_metallic = self.system.analysis.energy()['dipolar'] + ref_mdlc_forces_metallic = np.copy(self.system.part[:].f) + ref_mdlc_torque_metallic = np.copy(self.system.part[:].torque_lab) + self.system.actors.clear() + + # check non-metallic case + tol = 1e-10 + for epsilon in np.power(10., np.arange(-4, 5)): + dipole_correction = 4 * np.pi / box_vol / (1 + 2 * epsilon) + e_correction = dipole_correction / 2 * np.linalg.norm(dip)**2 + t_correction = np.cross([p1.dip, p2.dip], dipole_correction * dip) + ref_dp3m_energy = ref_dp3m_energy_metallic + prefactor * e_correction + ref_dp3m_forces = ref_dp3m_forces_metallic + ref_dp3m_torque = ref_dp3m_torque_metallic - prefactor * t_correction dp3m = espressomd.magnetostatics.DipolarP3M( - tune=False, epsilon=epsilon, **dp3m_params) + prefactor=prefactor, epsilon=epsilon, tune=False, **dp3m_params) self.system.actors.add(dp3m) - self.assertAlmostEqual(self.system.analysis.energy()['dipolar'], - dp3m_energy, places=5) - # update forces and torques - self.system.integrator.run(0) - np.testing.assert_allclose(np.copy(self.system.part[0].f), - dp3m_force, atol=1E-5) - np.testing.assert_allclose(np.copy(self.system.part[1].f), - -dp3m_force, atol=1E-5) - np.testing.assert_allclose(np.copy(self.system.part[0].torque_lab), - dp3m_torque1, atol=1E-5) - np.testing.assert_allclose(np.copy(self.system.part[1].torque_lab), - dp3m_torque2, atol=1E-5) - self.system.actors.remove(dp3m) + self.system.integrator.run(0, recalc_forces=True) + dp3m_forces = np.copy(self.system.part[:].f) + dp3m_torque = np.array([ + p1.convert_vector_space_to_body(p1.torque_lab), + p2.convert_vector_space_to_body(p2.torque_lab)]) + dp3m_energy = self.system.analysis.energy()['dipolar'] + np.testing.assert_allclose(dp3m_forces, ref_dp3m_forces, atol=tol) + np.testing.assert_allclose(dp3m_torque, ref_dp3m_torque, atol=tol) + np.testing.assert_allclose(dp3m_energy, ref_dp3m_energy, atol=tol) + + # MDLC cancels out dipole correction + ref_mdlc_energy = ref_mdlc_energy_metallic + ref_mdlc_forces = ref_mdlc_forces_metallic + ref_mdlc_torque = ref_mdlc_torque_metallic + mdlc = espressomd.magnetostatic_extensions.DLC(**mdlc_params) + self.system.actors.add(mdlc) + self.system.integrator.run(0, recalc_forces=True) + mdlc_forces = np.copy(self.system.part[:].f) + mdlc_torque = np.copy(self.system.part[:].torque_lab) + mdlc_energy = self.system.analysis.energy()['dipolar'] + np.testing.assert_allclose(mdlc_forces, ref_mdlc_forces, atol=tol) + np.testing.assert_allclose(mdlc_torque, ref_mdlc_torque, atol=tol) + np.testing.assert_allclose(mdlc_energy, ref_mdlc_energy, atol=tol) + + self.system.actors.clear() if __name__ == "__main__": diff --git a/testsuite/python/ek_common.py b/testsuite/python/ek_common.py index d13765b46f3..b6db63aed52 100644 --- a/testsuite/python/ek_common.py +++ b/testsuite/python/ek_common.py @@ -55,7 +55,7 @@ def pressure_tensor_offdiagonal(x, xi, bjerrum_length, force): # function to calculate the hydrostatic pressure -# Technically, the LB simulates a compressible fluid, whiches pressure +# Technically, the LB simulates a compressible fluid, whose pressure # tensor contains an additional term on the diagonal, proportional to # the divergence of the velocity. We neglect this contribution, which # creates a small error in the direction normal to the wall, which @@ -72,22 +72,3 @@ def hydrostatic_pressure( offset = ek[int(box_x / (2 * agrid)), int(box_y / (2 * agrid)), int(box_z / (2 * agrid))].pressure[tensor_entry] return 0.0 + offset - - -# variant from the nonlinear tests -def hydrostatic_pressure_non_lin( - ek, - x, - xi, - bjerrum_length, - tensor_entry, - box_x, - box_y, - box_z, - agrid, - temperature): - offset = ek[int(box_x / (2 * agrid)), int(box_y / (2 * agrid)), - int(box_z / (2 * agrid))].pressure[tensor_entry] - return temperature * xi * xi * \ - math.tan(xi * x) * math.tan(xi * x) / \ - (2.0 * math.pi * bjerrum_length) + offset diff --git a/testsuite/python/ek_eof_one_species_base.py b/testsuite/python/ek_eof_one_species.py similarity index 63% rename from testsuite/python/ek_eof_one_species_base.py rename to testsuite/python/ek_eof_one_species.py index a419a8596c2..e7c19634c5e 100644 --- a/testsuite/python/ek_eof_one_species_base.py +++ b/testsuite/python/ek_eof_one_species.py @@ -17,29 +17,40 @@ import unittest as ut import unittest_decorators as utx +import pathlib + import sys import math import numpy as np +try: + import vtk + from vtk.util import numpy_support as VN + skipIfMissingPythonPackage = utx.no_skip +except ImportError: + skipIfMissingPythonPackage = ut.skip( + "Python module vtk not available, skipping test!") + import espressomd import espressomd.electrokinetics import espressomd.shapes import ek_common -from tests_common import DynamicDict ########################################################################## # Set up the System # ########################################################################## -# Set the slit pore geometry the width is the non-periodic part of the geometry -# the padding is used to ensure that there is no field inside outside the slit +# Set the slit pore geometry. The width is the non-periodic part of the +# geometry. The padding is used to ensure that there is no field outside +# the slit. -params_base = DynamicDict([ +params_base = dict([ ('dt', 1.0 / 7), ('integration_length', 2300), ('agrid', 1. / 3), ('density_water', 26.15), ('friction', 1.9), ('width', 20.0), + ('thickness', 3.0), ('sigma', -0.04), ('padding', 6.0), ('force', 0.07), @@ -47,8 +58,53 @@ ('viscosity_kinematic', 1.7), ('bjerrum_length', 0.8), ('sigma', -0.04), - ('density_counterions', '-2.0 * sigma / width'), - ('valency', 1.0)]) + ('valency', 1.0), +]) +params_base['density_counterions'] = -2.0 * \ + params_base['sigma'] / params_base['width'] + +axis = "@TEST_SUFFIX@" +params = { + "x": dict([ + ('box_x', params_base['thickness']), + ('box_y', params_base['thickness']), + ('box_z', params_base['width'] + 2 * params_base['padding']), + ('ext_force_density', [params_base['force'], 0.0, 0.0]), + ('wall_normal_1', [0, 0, 1]), + ('wall_normal_2', [0, 0, -1]), + ('periodic_dirs', (0, 1)), + ('non_periodic_dir', 2), + ('n_roll_index', 0), + ('calculated_pressure_xy', 0.0), + ('calculated_pressure_yz', 0.0) + ]), + "y": dict([ + ('box_x', params_base['width'] + 2 * params_base['padding']), + ('box_y', params_base['thickness']), + ('box_z', params_base['thickness']), + ('ext_force_density', [0.0, params_base['force'], 0.0]), + ('wall_normal_1', [1, 0, 0]), + ('wall_normal_2', [-1, 0, 0]), + ('periodic_dirs', (1, 2)), + ('non_periodic_dir', 0), + ('n_roll_index', 1), + ('calculated_pressure_xz', 0.0), + ('calculated_pressure_yz', 0.0) + ]), + "z": dict([ + ('box_x', params_base['thickness']), + ('box_y', params_base['width'] + 2 * params_base['padding']), + ('box_z', params_base['thickness']), + ('ext_force_density', [0.0, 0.0, params_base['force']]), + ('wall_normal_1', [0, 1, 0]), + ('wall_normal_2', [0, -1, 0]), + ('periodic_dirs', (0, 2)), + ('non_periodic_dir', 1), + ('n_roll_index', 2), + ('calculated_pressure_xy', 0.0), + ('calculated_pressure_xz', 0.0) + ]) +}[axis] def bisection(): @@ -110,16 +166,28 @@ class ek_eof_one_species(ut.TestCase): system = espressomd.System(box_l=[1.0, 1.0, 1.0]) xi = bisection() - def run_test(self, params): - system = self.system + def parse_vtk(self, filepath, name, shape): + reader = vtk.vtkStructuredPointsReader() + reader.SetFileName(filepath) + reader.ReadAllVectorsOn() + reader.ReadAllScalarsOn() + reader.Update() + + data = reader.GetOutput() + points = data.GetPointData() + + return VN.vtk_to_numpy(points.GetArray(name)).reshape(shape, order='F') + + @classmethod + def setUpClass(cls): + system = cls.system system.box_l = [params['box_x'], params['box_y'], params['box_z']] system.time_step = params_base['dt'] - system.thermostat.turn_off() system.cell_system.skin = 0.1 system.thermostat.turn_off() # Set up the (LB) electrokinetics fluid - ek = espressomd.electrokinetics.Electrokinetics( + ek = cls.ek = espressomd.electrokinetics.Electrokinetics( agrid=params_base['agrid'], lb_density=params_base['density_water'], viscosity=params_base['viscosity_kinematic'], @@ -129,7 +197,7 @@ def run_test(self, params): params_base['temperature'], stencil="linkcentered") - counterions = espressomd.electrokinetics.Species( + counterions = cls.counterions = espressomd.electrokinetics.Species( density=params_base['density_counterions'], D=0.3, valency=params_base['valency'], @@ -156,6 +224,7 @@ def run_test(self, params): # Integrate the system system.integrator.run(params_base['integration_length']) + def test(self): # compare the various quantities to the analytic results total_velocity_difference = 0.0 total_density_difference = 0.0 @@ -166,6 +235,9 @@ def run_test(self, params): total_pressure_difference_yz = 0.0 total_pressure_difference_xz = 0.0 + system = self.system + ek = self.ek + counterions = self.counterions for i in range( int(system.box_l[params['non_periodic_dir']] / params_base['agrid'])): if (i * @@ -301,3 +373,90 @@ def run_test(self, params): "Pressure accuracy yz component not achieved") self.assertLess(total_pressure_difference_xz, 1.0e-04, "Pressure accuracy xz component not achieved") + + @skipIfMissingPythonPackage + def test_vtk(self): + ek = self.ek + counterions = self.counterions + grid_dims = list( + map(int, np.round(self.system.box_l / params_base['agrid']))) + + # write VTK files + vtk_root = f"vtk_out/ek_eof_{axis}" + pathlib.Path(vtk_root).mkdir(parents=True, exist_ok=True) + path_vtk_boundary = f"{vtk_root}/boundary.vtk" + path_vtk_velocity = f"{vtk_root}/velocity.vtk" + path_vtk_potential = f"{vtk_root}/potential.vtk" + path_vtk_lbdensity = f"{vtk_root}/density.vtk" + path_vtk_lbforce = f"{vtk_root}/lbforce.vtk" + path_vtk_density = f"{vtk_root}/lbdensity.vtk" + path_vtk_flux = f"{vtk_root}/flux.vtk" + path_vtk_flux_link = f"{vtk_root}/flux_link.vtk" + if espressomd.has_features('EK_DEBUG'): + path_vtk_flux_fluc = f"{vtk_root}/flux_fluc.vtk" + ek.write_vtk_boundary(path_vtk_boundary) + ek.write_vtk_velocity(path_vtk_velocity) + ek.write_vtk_potential(path_vtk_potential) + ek.write_vtk_density(path_vtk_lbdensity) + ek.write_vtk_lbforce(path_vtk_lbforce) + counterions.write_vtk_density(path_vtk_density) + counterions.write_vtk_flux(path_vtk_flux) + if espressomd.has_features('EK_DEBUG'): + counterions.write_vtk_flux_fluc(path_vtk_flux_fluc) + counterions.write_vtk_flux_link(path_vtk_flux_link) + + # load VTK files to check they are correctly formatted + get_vtk = self.parse_vtk + vtk_boundary = get_vtk(path_vtk_boundary, "boundary", grid_dims) + vtk_velocity = get_vtk(path_vtk_velocity, "velocity", grid_dims + [3]) + vtk_potential = get_vtk(path_vtk_potential, "potential", grid_dims) + vtk_lbdensity = get_vtk(path_vtk_lbdensity, "density_lb", grid_dims) + get_vtk(path_vtk_lbforce, "lbforce", grid_dims + [3]) + vtk_density = get_vtk(path_vtk_density, "density_1", grid_dims) + vtk_flux = get_vtk(path_vtk_flux, "flux_1", grid_dims + [3]) + if espressomd.has_features('EK_DEBUG'): + get_vtk(path_vtk_flux_fluc, "flux_fluc_1", grid_dims + [4]) + get_vtk(path_vtk_flux_link, "flux_link_1", grid_dims + [13]) + + # check VTK files against the EK grid + species_density = np.zeros(grid_dims) + species_flux = np.zeros(grid_dims + [3]) + ek_potential = np.zeros(grid_dims) + ek_velocity = np.zeros(grid_dims + [3]) + for i in range(grid_dims[0]): + for j in range(grid_dims[1]): + for k in range(grid_dims[2]): + index = np.array([i, j, k]) + species_density[i, j, k] = counterions[index].density + species_flux[i, j, k] = counterions[index].flux + ek_potential[i, j, k] = ek[index].potential + ek_velocity[i, j, k] = ek[index].velocity + + np.testing.assert_allclose(vtk_velocity, ek_velocity, atol=1e-6) + np.testing.assert_allclose(vtk_potential, ek_potential, atol=1e-6) + np.testing.assert_allclose(vtk_density, species_density, atol=1e-6) + np.testing.assert_allclose(vtk_flux, species_flux, atol=1e-6) + + # check VTK files against the EK parameters + dens = params_base['density_water'] + left_dist = int(params_base['padding'] / params_base['agrid']) + right_dist = int(-params_base['padding'] / params_base['agrid']) + thickness = int(params_base['thickness'] / params_base['agrid']) + i = np.roll([0, 0, right_dist], params['n_roll_index']) + j = np.roll([thickness, thickness, left_dist], params['n_roll_index']) + mask_left = np.zeros(grid_dims, dtype=bool) + mask_left[:j[0], :j[1], :j[2]] = True + mask_right = np.zeros(grid_dims, dtype=bool) + mask_right[i[0]:, i[1]:, i[2]:] = True + mask_outside = np.logical_or(mask_left, mask_right) + mask_inside = np.logical_not(mask_outside) + np.testing.assert_allclose(vtk_lbdensity[mask_inside], dens, atol=1e-4) + np.testing.assert_allclose(vtk_lbdensity[mask_outside], 0, atol=1e-6) + np.testing.assert_allclose(vtk_boundary[mask_left], 1, atol=1e-6) + np.testing.assert_allclose(vtk_boundary[mask_left], 1, atol=1e-6) + np.testing.assert_allclose(vtk_boundary[mask_right], 2, atol=1e-6) + np.testing.assert_allclose(vtk_boundary[mask_inside], 0, atol=1e-6) + + +if __name__ == "__main__": + ut.main() diff --git a/testsuite/python/ek_eof_one_species_x.py b/testsuite/python/ek_eof_one_species_x.py deleted file mode 100644 index 4d06ecaeab4..00000000000 --- a/testsuite/python/ek_eof_one_species_x.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 2011-2019 The ESPResSo project -# -# This file is part of ESPResSo. -# -# ESPResSo is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# ESPResSo is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -import unittest as ut - -from ek_eof_one_species_base import ek_eof_one_species -from ek_eof_one_species_base import params_base - -params_x = dict([ - ('box_x', 3.0), - ('box_y', 3.0), - ('box_z', params_base['width'] + 2 * params_base['padding']), - ('ext_force_density', [params_base['force'], 0.0, 0.0]), - ('wall_normal_1', [0, 0, 1]), - ('wall_normal_2', [0, 0, -1]), - ('periodic_dirs', (0, 1)), - ('non_periodic_dir', 2), - ('n_roll_index', 0), - ('calculated_pressure_xy', 0.0), - ('calculated_pressure_yz', 0.0) -]) - - -class eof_x(ek_eof_one_species): - - def test(self): - self.run_test(params_x) - - -if __name__ == "__main__": - ut.main() diff --git a/testsuite/python/ek_eof_one_species_y.py b/testsuite/python/ek_eof_one_species_y.py deleted file mode 100644 index 7e72950a1f3..00000000000 --- a/testsuite/python/ek_eof_one_species_y.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 2011-2019 The ESPResSo project -# -# This file is part of ESPResSo. -# -# ESPResSo is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# ESPResSo is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -import unittest as ut - -from ek_eof_one_species_base import ek_eof_one_species -from ek_eof_one_species_base import params_base - -params_y = dict([ - ('box_x', params_base['width'] + 2 * params_base['padding']), - ('box_y', 3.0), - ('box_z', 3.0), - ('ext_force_density', [0.0, params_base['force'], 0.0]), - ('wall_normal_1', [1, 0, 0]), - ('wall_normal_2', [-1, 0, 0]), - ('periodic_dirs', (1, 2)), - ('non_periodic_dir', 0), - ('n_roll_index', 1), - ('calculated_pressure_xz', 0.0), - ('calculated_pressure_yz', 0.0) -]) - - -class eof_y(ek_eof_one_species): - - def test(self): - self.run_test(params_y) - - -if __name__ == "__main__": - ut.main() diff --git a/testsuite/python/ek_eof_one_species_z.py b/testsuite/python/ek_eof_one_species_z.py deleted file mode 100644 index 2bade76def7..00000000000 --- a/testsuite/python/ek_eof_one_species_z.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 2011-2019 The ESPResSo project -# -# This file is part of ESPResSo. -# -# ESPResSo is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# ESPResSo is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -import unittest as ut - -from ek_eof_one_species_base import ek_eof_one_species -from ek_eof_one_species_base import params_base - -params_z = dict([ - ('box_x', 3.0), - ('box_y', params_base['width'] + 2 * params_base['padding']), - ('box_z', 3.0), - ('ext_force_density', [0.0, 0.0, params_base['force']]), - ('wall_normal_1', [0, 1, 0]), - ('wall_normal_2', [0, -1, 0]), - ('periodic_dirs', (0, 2)), - ('non_periodic_dir', 1), - ('n_roll_index', 2), - ('calculated_pressure_xy', 0.0), - ('calculated_pressure_xz', 0.0) -]) - - -class eof_z(ek_eof_one_species): - - def test(self): - self.run_test(params_z) - - -if __name__ == "__main__": - ut.main() diff --git a/testsuite/python/elc.py b/testsuite/python/elc.py index c1d9a9085ab..32fe7754935 100644 --- a/testsuite/python/elc.py +++ b/testsuite/python/elc.py @@ -16,8 +16,7 @@ # along with this program. If not, see . import unittest as ut import unittest_decorators as utx -import espressomd -from espressomd import electrostatics, electrostatic_extensions +import espressomd.electrostatics import numpy as np @@ -33,32 +32,31 @@ class ElcTest(ut.TestCase): system.cell_system.skin = 0.0 def test_finite_potential_drop(self): - s = self.system + system = self.system - p1 = s.part.add(pos=[0, 0, 1], q=+1) - p2 = s.part.add(pos=[0, 0, 9], q=-1) + p1 = system.part.add(pos=[0, 0, 1], q=+1) + p2 = system.part.add(pos=[0, 0, 9], q=-1) - s.actors.add( - electrostatics.P3M( - # zero is not allowed - prefactor=1e-100, - mesh=32, - cao=5, - accuracy=1e-3, - )) - - s.actors.add( - electrostatic_extensions.ELC( - gap_size=GAP[2], - maxPWerror=1e-3, - delta_mid_top=-1, - delta_mid_bot=-1, - const_pot=1, - pot_diff=POTENTIAL_DIFFERENCE, - )) + p3m = espressomd.electrostatics.P3M( + # zero is not allowed + prefactor=1e-100, + mesh=32, + cao=5, + accuracy=1e-3, + ) + elc = espressomd.electrostatics.ELC( + p3m_actor=p3m, + gap_size=GAP[2], + maxPWerror=1e-3, + delta_mid_top=-1, + delta_mid_bot=-1, + const_pot=1, + pot_diff=POTENTIAL_DIFFERENCE, + ) + system.actors.add(elc) # Calculated energy - U_elc = s.analysis.energy()['coulomb'] + U_elc = system.analysis.energy()['coulomb'] # Expected E-Field is voltage drop over the box E_expected = POTENTIAL_DIFFERENCE / (BOX_L[2] - GAP[2]) @@ -67,7 +65,7 @@ def test_finite_potential_drop(self): self.assertAlmostEqual(U_elc, U_expected) - s.integrator.run(0) + system.integrator.run(0) self.assertAlmostEqual(E_expected, p1.f[2] / p1.q) self.assertAlmostEqual(E_expected, p2.f[2] / p2.q) @@ -76,14 +74,14 @@ def test_finite_potential_drop(self): p1.pos = [BOX_L[0] / 2, BOX_L[1] / 2, BOX_L[2] - GAP[2] / 2] with self.assertRaises(Exception): self.system.analysis.energy() - with self.assertRaises(Exception): - self.integrator.run(2) + with self.assertRaisesRegex(Exception, 'entered ELC gap region'): + self.system.integrator.run(2) # negative direction p1.pos = [BOX_L[0] / 2, BOX_L[1] / 2, -GAP[2] / 2] with self.assertRaises(Exception): self.system.analysis.energy() - with self.assertRaises(Exception): - self.integrator.run(2) + with self.assertRaisesRegex(Exception, 'entered ELC gap region'): + self.system.integrator.run(2) if __name__ == "__main__": diff --git a/testsuite/python/elc_vs_analytic.py b/testsuite/python/elc_vs_analytic.py index 8f186608061..d5bfbb28764 100644 --- a/testsuite/python/elc_vs_analytic.py +++ b/testsuite/python/elc_vs_analytic.py @@ -19,7 +19,6 @@ import espressomd import numpy as np import espressomd.electrostatics -from espressomd import electrostatic_extensions @utx.skipIfMissingFeatures(["P3M"]) @@ -62,12 +61,11 @@ def test_elc(self): accuracy=self.accuracy, mesh=[58, 58, 70], cao=4) - self.system.actors.add(p3m) - - elc = electrostatic_extensions.ELC(gap_size=self.elc_gap, - maxPWerror=self.accuracy, - delta_mid_bot=self.delta_mid_bot, - delta_mid_top=self.delta_mid_top) + elc = espressomd.electrostatics.ELC(p3m_actor=p3m, + gap_size=self.elc_gap, + maxPWerror=self.accuracy, + delta_mid_bot=self.delta_mid_bot, + delta_mid_top=self.delta_mid_top) self.system.actors.add(elc) elc_results = self.scan() diff --git a/testsuite/python/electrostaticInteractions.py b/testsuite/python/electrostaticInteractions.py index ee676971d4d..a59b0da1992 100644 --- a/testsuite/python/electrostaticInteractions.py +++ b/testsuite/python/electrostaticInteractions.py @@ -27,14 +27,13 @@ @utx.skipIfMissingFeatures(["ELECTROSTATICS"]) class ElectrostaticInteractionsTests(ut.TestCase): # Handle to espresso system - system = espressomd.System(box_l=[1.0, 1.0, 1.0]) + system = espressomd.System(box_l=[20., 20., 20.]) def setUp(self): - self.system.box_l = [20, 20, 20] self.system.time_step = 0.01 - self.system.part.add(id=0, pos=(1.0, 2.0, 2.0), q=1) - self.system.part.add(id=1, pos=(3.0, 2.0, 2.0), q=-1) + self.system.part.add(id=0, pos=(9.0, 2.0, 2.0), q=1) + self.system.part.add(id=1, pos=(11.0, 2.0, 2.0), q=-1) def tearDown(self): self.system.part.clear() @@ -78,51 +77,53 @@ def calc_rf_potential(self, r, rf_params): @utx.skipIfMissingFeatures(["P3M"]) def test_p3m(self): - prefactor = 1.1 - self.system.part[0].pos = [1.0, 2.0, 2.0] - self.system.part[1].pos = [3.0, 2.0, 2.0] - # results, reference values for energy and force only calculated for - # prefactor = 1 - p3m_energy = -0.501062398379 * prefactor - p3m_force = 2.48921612e-01 * prefactor - p3m = espressomd.electrostatics.P3M(prefactor=prefactor, - accuracy=9.910945054074526e-08, - mesh=[22, 22, 22], - cao=7, - r_cut=8.906249999999998, - alpha=0.387611049779351, - tune=False) - self.system.actors.add(p3m) - self.assertAlmostEqual(self.system.analysis.energy()['coulomb'], - p3m_energy, places=5) - # need to update forces - self.system.integrator.run(0) - np.testing.assert_allclose(np.copy(self.system.part[0].f), - [p3m_force, 0, 0], atol=1E-4) - np.testing.assert_allclose(np.copy(self.system.part[1].f), - [-p3m_force, 0, 0], atol=1E-5) - - @utx.skipIfMissingFeatures(["P3M"]) - def test_p3m_non_metallic(self): prefactor = 1.1 box_vol = self.system.volume() - self.system.part[0].pos = [1.0, 2.0, 2.0] - self.system.part[1].pos = [3.0, 2.0, 2.0] - for epsilon_power in range(-4, 5): - epsilon = 10**epsilon_power - p3m_energy = np.pi / box_vol * 16 / (1 + 2 * epsilon) - 0.501 - p3m_energy *= prefactor - p3m = espressomd.electrostatics.P3M(prefactor=prefactor, - accuracy=9.910945054074526e-08, - mesh=[22, 22, 22], - cao=7, - epsilon=epsilon, - r_cut=8.906249999999998, - alpha=0.387611049779351, - tune=False) + p1, p2 = self.system.part[:] + dip = np.copy(p1.q * p1.pos + p2.q * p2.pos) + p3m_params = {'accuracy': 1e-7, + 'mesh': [22, 22, 22], + 'cao': 7, + 'r_cut': 8.906249999999998, + 'alpha': 0.387611049779351} + + # reference values for energy and force calculated for prefactor = 1 + ref_energy = -0.501062398379 * prefactor + ref_force1 = [0.248921612 * prefactor, 0, 0] + ref_force2 = [-ref_force1[0], 0, 0] + + # check metallic case + p3m = espressomd.electrostatics.P3M( + prefactor=prefactor, epsilon='metallic', tune=False, **p3m_params) + self.system.actors.add(p3m) + self.system.integrator.run(0, recalc_forces=True) + p3m_energy = self.system.analysis.energy()['coulomb'] + tol = 1e-5 + np.testing.assert_allclose(p3m_energy, ref_energy, atol=tol) + np.testing.assert_allclose(np.copy(p1.f), ref_force1, atol=tol) + np.testing.assert_allclose(np.copy(p2.f), ref_force2, atol=tol) + + # keep current values as reference to check for P3M dipole correction + ref_energy_metallic = self.system.analysis.energy()['coulomb'] + ref_forces_metallic = np.copy(self.system.part[:].f) + self.system.actors.remove(p3m) + + # check non-metallic case + tol = 1e-10 + for epsilon in np.power(10., np.arange(-4, 5)): + dipole_correction = 4 * np.pi / box_vol / (1 + 2 * epsilon) + energy_correction = dipole_correction * np.linalg.norm(dip)**2 + forces_correction = np.outer([p1.q, p2.q], dipole_correction * dip) + ref_energy = ref_energy_metallic + prefactor * energy_correction + ref_forces = ref_forces_metallic - prefactor * forces_correction + p3m = espressomd.electrostatics.P3M( + prefactor=prefactor, epsilon=epsilon, tune=False, **p3m_params) self.system.actors.add(p3m) - self.assertAlmostEqual(self.system.analysis.energy()['coulomb'], - p3m_energy, places=3) + self.system.integrator.run(0, recalc_forces=True) + p3m_forces = np.array([p1.f, p2.f]) + p3m_energy = self.system.analysis.energy()['coulomb'] + np.testing.assert_allclose(p3m_energy, ref_energy, atol=tol) + np.testing.assert_allclose(p3m_forces, ref_forces, atol=tol) self.system.actors.remove(p3m) def test_dh(self): diff --git a/testsuite/python/engine_langevin.py b/testsuite/python/engine_langevin.py index 5b3f9cf9eb5..a6969bb1b8a 100644 --- a/testsuite/python/engine_langevin.py +++ b/testsuite/python/engine_langevin.py @@ -52,7 +52,7 @@ def z_v(t, z0): S.part.add(id=0, pos=pos_0, swimming={"v_swim": v_swim}) S.part.add(id=1, pos=pos_1, swimming={"f_swim": f_swim}) - S.part[:].rotation = 1, 1, 1 + S.part[:].rotation = (1, 1, 1) S.thermostat.set_langevin(kT=temp, gamma=gamma, seed=42) diff --git a/testsuite/python/field_test.py b/testsuite/python/field_test.py index 3cf32e30658..18c56e33868 100644 --- a/testsuite/python/field_test.py +++ b/testsuite/python/field_test.py @@ -89,6 +89,14 @@ def test_linear_electric_potential(self): self.assertAlmostEqual(self.system.analysis.energy()['total'], self.system.analysis.energy()['external_fields']) + np.testing.assert_allclose( + electric_field.call_method("_eval_field", x=[0, 0, 0]), phi0) + np.testing.assert_allclose( + electric_field.call_method("_eval_field", x=[3, 2, 1]), + np.dot(-E, [3, 2, 1]) + phi0) + np.testing.assert_allclose( + electric_field.call_method("_eval_jacobian", x=[3, 2, 1]), -E) + @utx.skipIfMissingFeatures("ELECTROSTATICS") def test_electric_plane_wave(self): E0 = np.array([1., -2., 3.]) @@ -146,10 +154,18 @@ def test_potential_field(self): box, h, self.potential) F = constraints.PotentialField(field=field_data, grid_spacing=h, + particle_scales={1: 0.0}, default_scale=scaling) p = self.system.part.add(pos=[0, 0, 0]) + self.system.part.add(pos=[1, 0, 0]) self.system.constraints.add(F) + self.assertAlmostEqual(F.default_scale, scaling, delta=1e-9) + self.assertEqual(F.particle_scales, {1: 0.0}) + with self.assertRaisesRegex(RuntimeError, 'Parameter default_scale is read-only'): + F.default_scale = 2.0 + with self.assertRaisesRegex(RuntimeError, 'Parameter particle_scales is read-only'): + F.particle_scales = {0: 0.0} for i in product(*map(range, 3 * [10])): x = (h * i) @@ -198,10 +214,17 @@ def test_force_field(self): field_data = constraints.ForceField.field_from_fn(box, h, self.force) F = constraints.ForceField(field=field_data, grid_spacing=h, + particle_scales={1: 0.0}, default_scale=scaling) p = self.system.part.add(pos=[0, 0, 0]) self.system.constraints.add(F) + self.assertAlmostEqual(F.default_scale, scaling, delta=1e-9) + self.assertEqual(F.particle_scales, {1: 0.0}) + with self.assertRaisesRegex(RuntimeError, 'Parameter default_scale is read-only'): + F.default_scale = 2.0 + with self.assertRaisesRegex(RuntimeError, 'Parameter particle_scales is read-only'): + F.particle_scales = {0: 0.0} for i in product(*map(range, 3 * [10])): x = (h * i) @@ -226,6 +249,8 @@ def test_flow_field(self): p = self.system.part.add(pos=[0, 0, 0], v=[1, 2, 3]) self.system.constraints.add(F) + with self.assertRaisesRegex(RuntimeError, 'Parameter gamma is read-only'): + F.gamma = 2.0 for i in product(*map(range, 3 * [10])): x = (h * i) diff --git a/testsuite/python/gpu_availability.py b/testsuite/python/gpu_availability.py index 03d30cb44d1..e8638b768d8 100644 --- a/testsuite/python/gpu_availability.py +++ b/testsuite/python/gpu_availability.py @@ -17,21 +17,62 @@ # along with this program. If not, see . # import unittest as ut +import unittest_decorators as utx import espressomd class GPUAvailability(ut.TestCase): """Tests consistency of GPU availability reporting.""" + system = espressomd.System(box_l=[1, 1, 1]) def test(self): if espressomd.has_features("CUDA"): - system = espressomd.System(box_l=[1, 1, 1]) - self.assertEqual(system.cuda_init_handle.device_list != {}, + self.assertEqual(self.system.cuda_init_handle.list_devices() != {}, espressomd.gpu_available()) + self.assertEqual( + self.system.cuda_init_handle.list_devices_properties() != {}, + espressomd.gpu_available()) else: self.assertFalse(espressomd.gpu_available()) + @utx.skipIfMissingFeatures("CUDA") + def test_exceptions(self): + error_msg = 'CUDA error: ' + if espressomd.gpu_available(): + n_gpus = len(self.system.cuda_init_handle.list_devices()) + with self.assertRaisesRegex(RuntimeError, error_msg): + self.system.cuda_init_handle.device = n_gpus + 1 + else: + with self.assertRaisesRegex(RuntimeError, error_msg): + self.system.cuda_init_handle.device + with self.assertRaisesRegex(RuntimeError, error_msg): + self.system.cuda_init_handle.device = 0 + + @utx.skipIfMissingGPU() + def test_list_devices(self): + # check if GPU properties can be queried + device_list = self.system.cuda_init_handle.list_devices() + device_list_p = self.system.cuda_init_handle.list_devices_properties() + self.assertEqual(len(device_list_p), 1) + device_list_p_head = list(device_list_p.values())[0] + dev_keys = {'name', 'compute_capability', 'cores', 'total_memory'} + # check both dicts agree + self.assertEqual(device_list.keys(), device_list_p_head.keys()) + for dev_id in device_list: + self.assertEqual(device_list_p_head[dev_id].keys(), dev_keys) + self.assertEqual( + device_list_p_head[dev_id]['name'], + device_list[dev_id]) + # check the currently active GPU + dev_id = self.system.cuda_init_handle.device + self.assertIn(dev_id, device_list_p_head) + device = device_list_p_head[dev_id] + self.assertGreater(device['cores'], 0) + self.assertGreater(device['total_memory'], 0) + self.assertGreaterEqual(device['compute_capability'][0], 3) + self.assertGreaterEqual(device['compute_capability'][1], 0) + if __name__ == "__main__": ut.main() diff --git a/testsuite/python/icc.py b/testsuite/python/icc.py index 7e98d658cfa..f97d5d88cfa 100644 --- a/testsuite/python/icc.py +++ b/testsuite/python/icc.py @@ -17,98 +17,162 @@ import unittest as ut import unittest_decorators as utx import espressomd +import numpy as np -@utx.skipIfMissingFeatures(["P3M", "EXTERNAL_FORCES"]) +@utx.skipIfMissingFeatures(["ELECTROSTATICS", "EXTERNAL_FORCES"]) class test_icc(ut.TestCase): + system = espressomd.System(box_l=[10, 10, 10]) + + def tearDown(self): + self.system.actors.clear() + self.system.part.clear() + + def add_icc_particles(self, side_num_particles, + initial_charge, z_position): + number = side_num_particles**2 + areas = self.system.box_l[0] * \ + self.system.box_l[1] / number * np.ones(number) + normals = np.zeros((number, 3)) + normals[:, 2] = 1 + + x_position = np.linspace( + 0, + self.system.box_l[0], + side_num_particles, + endpoint=False) + y_position = np.linspace( + 0, + self.system.box_l[1], + side_num_particles, + endpoint=False) + x_pos, y_pos = np.meshgrid(x_position, y_position) + + positions = np.stack((x_pos, y_pos, np.full_like( + x_pos, z_position)), axis=-1).reshape(-1, 3) + + charges = np.full(number, initial_charge) + fix = [(True, True, True)] * number + + return self.system.part.add( + pos=positions, q=charges, fix=fix), normals, areas + + def common_setup(self, kwargs, error): + from espressomd.electrostatic_extensions import ICC + + self.tearDown() + part_slice, normals, areas = self.add_icc_particles(2, 0.01, 0) + + params = {"n_icc": len(part_slice), + "normals": normals, + "areas": areas, + "epsilons": np.ones_like(areas), + "first_id": part_slice.id[0], + "check_neutrality": False} + + params.update(kwargs) + + icc = ICC(**params) + with self.assertRaisesRegex(Exception, error): + self.system.actors.add(icc) + + def test_params(self): + params = [({"n_icc": -1}, 'ICC: invalid number of particles'), + ({"first_id": -1}, 'ICC: invalid first_id'), + ({"max_iterations": -1}, 'ICC: invalid max_iterations'), + ({"convergence": -1}, 'ICC: invalid convergence value'), + ({"relaxation": -1}, 'ICC: invalid relaxation value'), + ({"relaxation": 2.1}, 'ICC: invalid relaxation value'), + ({"eps_out": -1}, 'ICC: invalid eps_out'), + ({"ext_field": 0}, 'A single value was given but 3 were expected'), ] + + for kwargs, error in params: + self.common_setup(kwargs, error) + + def test_core_params(self): + from espressomd.electrostatic_extensions import ICC + + self.tearDown() + part_slice, normals, areas = self.add_icc_particles(5, 0.01, 0) - def runTest(self): + params = {"n_icc": len(part_slice), + "normals": normals, + "areas": areas, + "epsilons": np.ones_like(areas), + "first_id": part_slice.id[0], + "check_neutrality": False} + + icc = ICC(**params) + self.system.actors.add(icc) + + icc_params = icc.get_params() + for key, value in params.items(): + np.testing.assert_allclose(value, np.copy(icc_params[key])) + + @utx.skipIfMissingFeatures(["P3M"]) + def test_dipole_system(self): from espressomd.electrostatics import P3M from espressomd.electrostatic_extensions import ICC - S = espressomd.System(box_l=[1.0, 1.0, 1.0]) - # Parameters - box_l = 20.0 - nicc = 10 - q_test = 10.0 - q_dist = 5.0 - - # System - S.box_l = [box_l, box_l, box_l + 5.0] - S.cell_system.skin = 0.4 - S.time_step = 0.01 - - # ICC particles - nicc_per_electrode = nicc * nicc - nicc_tot = 2 * nicc_per_electrode - iccArea = box_l * box_l / nicc_per_electrode - - iccNormals = [] - iccAreas = [] - iccSigmas = [] - iccEpsilons = [] - - l = box_l / nicc - for xi in range(nicc): - for yi in range(nicc): - S.part.add(pos=[l * xi, l * yi, 0], q=-0.0001, fix=[1, 1, 1]) - iccNormals.append([0, 0, 1]) - - for xi in range(nicc): - for yi in range(nicc): - S.part.add(pos=[l * xi, l * yi, box_l], - q=0.0001, fix=[1, 1, 1]) - iccNormals.append([0, 0, -1]) - - iccAreas.extend([iccArea] * nicc_tot) - iccSigmas.extend([0] * nicc_tot) - iccEpsilons.extend([10000000] * nicc_tot) - - # Test Dipole - b2 = box_l * 0.5 - S.part.add(pos=[b2, b2, b2 - q_dist / 2], q=q_test, fix=[1, 1, 1]) - S.part.add(pos=[b2, b2, b2 + q_dist / 2], q=-q_test, fix=[1, 1, 1]) - - # Actors + BOX_L = 20. + BOX_SPACE = 5. + + self.tearDown() + self.system.box_l = [BOX_L, BOX_L, BOX_L + BOX_SPACE] + self.system.cell_system.skin = 0.4 + self.system.time_step = 0.01 + + N_ICC_SIDE_LENGTH = 10 + DIPOLE_DISTANCE = 5.0 + DIPOLE_CHARGE = 10.0 + + part_slice_lower, normals_lower, areas_lower = self.add_icc_particles( + N_ICC_SIDE_LENGTH, -0.0001, 0.) + part_slice_upper, normals_upper, areas_upper = self.add_icc_particles( + N_ICC_SIDE_LENGTH, 0.0001, BOX_L) + + assert (part_slice_upper.id[-1] - part_slice_lower.id[0] + + 1) == 2 * N_ICC_SIDE_LENGTH**2, "ICC particles not continuous" + + normals = np.vstack((normals_lower, -normals_upper)) + areas = np.hstack((areas_lower, areas_upper)) + epsilons = np.full_like(areas, 1e8) + sigmas = np.zeros_like(areas) + + icc = ICC(n_icc=2 * N_ICC_SIDE_LENGTH**2, + normals=normals, + areas=areas, + epsilons=epsilons, + sigmas=sigmas, + convergence=1e-6, + max_iterations=100, + first_id=part_slice_lower.id[0], + eps_out=1., + relaxation=0.75, + ext_field=[0, 0, 0]) + + # Dipole in the center of the simulation box + BOX_L_HALF = BOX_L / 2 + + self.system.part.add(pos=[BOX_L_HALF, BOX_L_HALF, BOX_L_HALF - DIPOLE_DISTANCE / 2], + q=DIPOLE_CHARGE, fix=[True, True, True]) + self.system.part.add(pos=[BOX_L_HALF, BOX_L_HALF, BOX_L_HALF + DIPOLE_DISTANCE / 2], + q=-DIPOLE_CHARGE, fix=[True, True, True]) + p3m = P3M(prefactor=1, mesh=32, cao=7, accuracy=1e-5) - icc = ICC( - n_icc=nicc_tot, - convergence=1e-6, - relaxation=0.75, - ext_field=[0, 0, 0], - max_iterations=100, - first_id=0, - eps_out=1, - normals=iccNormals, - areas=iccAreas, - sigmas=iccSigmas, - epsilons=iccEpsilons) - - S.actors.add(p3m) - S.actors.add(icc) - - # Run - S.integrator.run(0) - - # Analyze - QL = sum(S.part[:nicc_per_electrode].q) - QR = sum(S.part[nicc_per_electrode:nicc_tot].q) - - testcharge_dipole = q_test * q_dist - induced_dipole = 0.5 * (abs(QL) + abs(QR)) * box_l - - # Result - self.assertAlmostEqual(1, induced_dipole / testcharge_dipole, places=4) - # Test applying changes - enegry_pre_change = S.analysis.energy()['total'] - pressure_pre_change = S.analysis.pressure()['total'] - icc.set_params(sigmas=[2.0] * nicc_tot) - icc.set_params(epsilons=[20.0] * nicc_tot) - enegry_post_change = S.analysis.energy()['total'] - pressure_post_change = S.analysis.pressure()['total'] - self.assertNotAlmostEqual(enegry_pre_change, enegry_post_change) - self.assertNotAlmostEqual(pressure_pre_change, pressure_post_change) + self.system.actors.add(p3m) + self.system.actors.add(icc) + + self.system.integrator.run(0) + + charge_lower = sum(part_slice_lower.q) + charge_upper = sum(part_slice_upper.q) + + testcharge_dipole = DIPOLE_CHARGE * DIPOLE_DISTANCE + induced_dipole = 0.5 * (abs(charge_lower) + abs(charge_upper)) * BOX_L + + self.assertAlmostEqual(1, induced_dipole / testcharge_dipole, places=4) if __name__ == "__main__": diff --git a/testsuite/python/lb_electrohydrodynamics.py b/testsuite/python/lb_electrohydrodynamics.py index a716e5a8d5c..126f8f351dc 100644 --- a/testsuite/python/lb_electrohydrodynamics.py +++ b/testsuite/python/lb_electrohydrodynamics.py @@ -40,9 +40,6 @@ def setUp(self): self.s.time_step = self.params['time_step'] self.s.cell_system.skin = self.params['skin'] - for i in self.s.actors: - self.s.actors.remove(i) - self.lbf = self.lb.LBFluid( visc=self.params['viscosity'], dens=self.params['dens'], @@ -56,6 +53,9 @@ def setUp(self): LB_fluid=self.lbf, gamma=self.params['friction']) + def tearDown(self): + self.s.actors.clear() + def test(self): s = self.s diff --git a/testsuite/python/lb_poiseuille_cylinder.py b/testsuite/python/lb_poiseuille_cylinder.py index 981c0c23e28..0607a60641f 100644 --- a/testsuite/python/lb_poiseuille_cylinder.py +++ b/testsuite/python/lb_poiseuille_cylinder.py @@ -18,6 +18,7 @@ import unittest_decorators as utx import numpy as np +import espressomd.math import espressomd.lb import espressomd.lbboundaries import espressomd.observables @@ -81,7 +82,8 @@ class LBPoiseuilleCommon: system = espressomd.System(box_l=[BOX_L] * 3) system.time_step = TIME_STEP system.cell_system.skin = 0.4 * AGRID - params = {'axis': [0, 0, 1]} + params = {'axis': [0, 0, 1], + 'orientation': [1, 0, 0]} def prepare(self): """ @@ -150,8 +152,10 @@ def prepare_obs(self): else: obs_center = [BOX_L / 2.0, BOX_L / 2.0, 0.0] local_obs_params = OBS_PARAMS.copy() - local_obs_params['center'] = obs_center - local_obs_params['axis'] = self.params['axis'] + ctp = espressomd.math.CylindricalTransformationParameters(center=obs_center, + axis=self.params['axis'], + orientation=self.params['orientation']) + local_obs_params['transform_params'] = ctp obs = espressomd.observables.CylindricalLBVelocityProfile( **local_obs_params) self.accumulator = espressomd.accumulators.MeanVarianceCalculator( @@ -178,16 +182,19 @@ def check_observable(self): def test_x(self): self.params['axis'] = [1, 0, 0] + self.params['orientation'] = [0, 0, -1] self.compare_to_analytical() self.check_observable() def test_y(self): self.params['axis'] = [0, 1, 0] + self.params['orientation'] = [1, 0, 0] self.compare_to_analytical() self.check_observable() def test_z(self): self.params['axis'] = [0, 0, 1] + self.params['orientation'] = [1, 0, 0] self.compare_to_analytical() self.check_observable() diff --git a/testsuite/python/observable_cylindrical.py b/testsuite/python/observable_cylindrical.py index 2e6e7a79ec2..84b33d810f2 100644 --- a/testsuite/python/observable_cylindrical.py +++ b/testsuite/python/observable_cylindrical.py @@ -18,6 +18,7 @@ import unittest as ut import espressomd import espressomd.observables +import espressomd.math import tests_common @@ -31,13 +32,15 @@ class TestCylindricalObservable(ut.TestCase): system.time_step = 0.01 system.cell_system.skin = 0.4 + cyl_transform_params = espressomd.math.CylindricalTransformationParameters( + center=3 * [7.5], axis=[1 / np.sqrt(2), 1 / np.sqrt(2), 0], orientation=[0, 0, 1]) + params = { - 'ids': list(range(100)), - 'center': [7.5, 7.5, 7.5], # center of the histogram - 'axis': 'y', - 'n_r_bins': 4, # number of bins in r - 'n_phi_bins': 4, # -*- in phi - 'n_z_bins': 4, # -*- in z + 'ids': None, + 'transform_params': cyl_transform_params, + 'n_r_bins': 4, + 'n_phi_bins': 3, + 'n_z_bins': 4, 'min_r': 0.0, 'min_phi': -np.pi, 'min_z': -5.0, @@ -46,180 +49,162 @@ class TestCylindricalObservable(ut.TestCase): 'max_z': 5.0, } + v_r = 0.6 + v_phi = 0.7 + v_z = 0.8 + def tearDown(self): self.system.part.clear() - def swap_axis(self, arr, axis): - if axis == 'x': - arr = np.dot( - tests_common.rotation_matrix([0, 1, 0], np.pi / 2.0), arr) - elif axis == 'y': - arr = np.dot( - tests_common.rotation_matrix([1, 0, 0], -np.pi / 2.0), arr) - return arr - - def swap_axis_inverse(self, arr, axis): - if axis == 'x': - arr = np.dot( - tests_common.rotation_matrix([0, 1, 0], -np.pi / 2.0), arr) - elif axis == 'y': - arr = np.dot( - tests_common.rotation_matrix([1, 0, 0], np.pi / 2.0), arr) - return arr - - def pol_coords(self): - positions = np.zeros((len(self.params['ids']), 3)) - for i, p in enumerate(self.system.part): - tmp = p.pos - np.array(self.params['center']) - tmp = self.swap_axis_inverse(tmp, self.params['axis']) - positions[ - i, :] = tests_common.transform_pos_from_cartesian_to_polar_coordinates(tmp) - return positions - - def set_particles(self): - self.system.part.clear() - # Parameters for an ellipse. - a = 1.0 # semi minor-axis length - b = 2.0 # semi major-axis length - # Choose the cartesian velocities such that each particle gets the same - # v_r, v_phi and v_z, respectively. - self.v_r = .75 - self.v_phi = 2.5 - self.v_z = 1.5 - for i in self.params['ids']: + def calc_ellipsis_pos_vel( + self, n_part, z_min, z_max, semi_x=1., semi_y=1.): + """ + Calculate positions on an elliptical corkscrew line. + Calculate cartesian velocities that lead to a + constant velocity in cylindrical coordinates + """ + + zs = np.linspace(z_min, z_max, num=n_part) + angles = np.linspace(-0.99 * np.pi, 0.999 * np.pi, num=n_part) + + positions = [] + velocities = [] + + for angle, z in zip(angles, zs): position = np.array( - [a * np.cos(i * 2.0 * np.pi / (len(self.params['ids']) + 1)), - b * np.sin(i * 2.0 * np.pi / (len(self.params['ids']) + 1)), - i * (self.params['max_z'] - self.params['min_z']) / - (len(self.params['ids']) + 1) - self.params['center'][2]]) - - e_z = np.array([0, 0, 1]) - e_r = position - (position * e_z) * e_z - e_r /= np.linalg.norm(e_r) - e_phi = np.cross(e_z, e_r) - velocity = e_r * self.v_r + e_phi * self.v_phi + e_z * self.v_z - - velocity = self.swap_axis(velocity, self.params['axis']) - position = self.swap_axis(position, self.params['axis']) - position += np.array(self.params['center']) - self.system.part.add(id=i, pos=position, v=velocity) - - def calculate_numpy_histogram(self): - pol_positions = self.pol_coords() + [semi_x * np.cos(angle), + semi_y * np.sin(angle), + z]) + + e_r, e_phi, e_z = tests_common.get_cylindrical_basis_vectors( + position) + velocity = self.v_r * e_r + self.v_phi * e_phi + self.v_z * e_z + + positions.append(position) + velocities.append(velocity) + + return np.array(positions), np.array(velocities) + + def align_with_observable_frame(self, vec): + """ + Rotate vectors from the original box frame to the frame of the observables. + """ + + # align original z to observable z + vec = tests_common.rodrigues_rot(vec, [1, -1, 0], -np.pi / 2.) + # original x now points along [sqrt(3),-sqrt(3),-sqrt(3)] + + # align original x to observable orientation + vec = tests_common.rodrigues_rot(vec, [1, 1, 0], -3. / 4. * np.pi) + return vec + + def setup_system_get_np_hist(self): + """ + Pick positions and velocities in the original box frame + and calculate the np histogram. + Then rotate and move the positions and velocities + to the frame of the observables. + After calculating the core observables, the result should be + the same as the np histogram obtained from the original box frame. + """ + + positions, velocities = self.calc_ellipsis_pos_vel(100, 0.99 * + self.params['min_z'], 0.9 * + self.params['max_z'], semi_x=0.9 * + self.params['max_r'], semi_y=0.2 * + self.params['max_r']) + + # first, get the numpy histogram of the cylinder coordinates + pos_cyl = [] + for pos in positions: + pos_cyl.append( + tests_common.transform_pos_from_cartesian_to_polar_coordinates(pos)) np_hist, np_edges = tests_common.get_histogram( - pol_positions, self.params, 'cylindrical') - return np_hist, np_edges - - def normalize_with_bin_volume(self, histogram): - bin_volume = tests_common.get_cylindrical_bin_volume( - self.params['n_r_bins'], - self.params['n_phi_bins'], - self.params['n_z_bins'], - self.params['min_r'], - self.params['max_r'], - self.params['min_phi'], - self.params['max_phi'], - self.params['min_z'], - self.params['max_z']) - for i in range(self.params['n_r_bins']): - histogram[i, :, :] /= bin_volume[i] - return histogram - - def density_profile_test(self): - self.set_particles() - # Set up the Observable. - local_params = self.params.copy() - if self.params['axis'] == 'x': - local_params['axis'] = [1.0, 0.0, 0.0] - elif self.params['axis'] == 'y': - local_params['axis'] = [0.0, 1.0, 0.0] - else: - local_params['axis'] = [0.0, 0.0, 1.0] - obs = espressomd.observables.CylindricalDensityProfile(**local_params) - core_hist = obs.calculate() - core_edges = obs.call_method("edges") - np_hist, np_edges = self.calculate_numpy_histogram() - np_hist = self.normalize_with_bin_volume(np_hist) - np.testing.assert_array_almost_equal(np_hist, core_hist) - for i in range(3): - np.testing.assert_array_almost_equal(np_edges[i], core_edges[i]) - self.assertEqual(np.prod(obs.shape()), len(np_hist.flatten())) - - def velocity_profile_test(self): - self.set_particles() - # Set up the Observable. - local_params = self.params.copy() - if self.params['axis'] == 'x': - local_params['axis'] = [1.0, 0.0, 0.0] - elif self.params['axis'] == 'y': - local_params['axis'] = [0.0, 1.0, 0.0] - else: - local_params['axis'] = [0.0, 0.0, 1.0] - obs = espressomd.observables.CylindricalVelocityProfile(**local_params) - core_hist = obs.calculate() + np.array(pos_cyl), self.params, 'cylindrical') + np_dens = tests_common.normalize_cylindrical_hist( + np_hist.copy(), self.params) + + # now align the positions and velocities with the frame of reference + # used in the observables + pos_aligned = [] + vel_aligned = [] + for pos, vel in zip(positions, velocities): + pos_aligned.append( + self.align_with_observable_frame(pos) + + self.cyl_transform_params.center) + vel_aligned.append(self.align_with_observable_frame(vel)) + self.system.part.add(pos=pos_aligned, v=vel_aligned) + self.params['ids'] = self.system.part[:].id + + return np_dens, np_edges + + def check_edges(self, observable, np_edges): + core_edges = observable.call_method("edges") + for core_edge, np_edge in zip(core_edges, np_edges): + np.testing.assert_array_almost_equal(core_edge, np_edge) + + def test_density_profile(self): + """ + Check that the result from the observable (in its own frame) + matches the np result from the box frame + """ + np_dens, np_edges = self.setup_system_get_np_hist() + + cyl_dens_prof = espressomd.observables.CylindricalDensityProfile( + **self.params) + core_hist = cyl_dens_prof.calculate() + np.testing.assert_array_almost_equal(np_dens, core_hist) + self.check_edges(cyl_dens_prof, np_edges) + + def test_vel_profile(self): + """ + Check that the result from the observable (in its own frame) + matches the np result from the box frame + """ + np_dens, np_edges = self.setup_system_get_np_hist() + cyl_vel_prof = espressomd.observables.CylindricalVelocityProfile( + **self.params) + core_hist = cyl_vel_prof.calculate() core_hist_v_r = core_hist[:, :, :, 0] core_hist_v_phi = core_hist[:, :, :, 1] core_hist_v_z = core_hist[:, :, :, 2] - np_hist, _ = self.calculate_numpy_histogram() - for x in np.nditer(np_hist, op_flags=['readwrite']): - if x[...] > 0.0: - x[...] /= x[...] - np.testing.assert_array_almost_equal(np_hist * self.v_r, core_hist_v_r) + np_hist_binary = np_dens + np_hist_binary[np.nonzero(np_hist_binary)] = 1 + np.testing.assert_array_almost_equal( + np_hist_binary * self.v_r, core_hist_v_r) np.testing.assert_array_almost_equal( - np_hist * self.v_phi, core_hist_v_phi) - np.testing.assert_array_almost_equal(np_hist * self.v_z, core_hist_v_z) - self.assertEqual(np.prod(obs.shape()), len(np_hist.flatten()) * 3) - - def flux_density_profile_test(self): - self.set_particles() - # Set up the Observable. - local_params = self.params.copy() - if self.params['axis'] == 'x': - local_params['axis'] = [1.0, 0.0, 0.0] - elif self.params['axis'] == 'y': - local_params['axis'] = [0.0, 1.0, 0.0] - else: - local_params['axis'] = [0.0, 0.0, 1.0] - obs = espressomd.observables.CylindricalFluxDensityProfile( - **local_params) - core_hist = obs.calculate() + np_hist_binary * self.v_phi, core_hist_v_phi) + np.testing.assert_array_almost_equal( + np_hist_binary * self.v_z, core_hist_v_z) + self.check_edges(cyl_vel_prof, np_edges) + + def test_flux_density_profile(self): + """ + Check that the result from the observable (in its own frame) + matches the np result from the box frame + """ + np_dens, np_edges = self.setup_system_get_np_hist() + cyl_flux_dens = espressomd.observables.CylindricalFluxDensityProfile( + **self.params) + core_hist = cyl_flux_dens.calculate() core_hist_v_r = core_hist[:, :, :, 0] core_hist_v_phi = core_hist[:, :, :, 1] core_hist_v_z = core_hist[:, :, :, 2] - np_hist, _ = self.calculate_numpy_histogram() - np_hist = self.normalize_with_bin_volume(np_hist) - np.testing.assert_array_almost_equal(np_hist * self.v_r, core_hist_v_r) + np.testing.assert_array_almost_equal(np_dens * self.v_r, core_hist_v_r) np.testing.assert_array_almost_equal( - np_hist * self.v_phi, core_hist_v_phi) - np.testing.assert_array_almost_equal(np_hist * self.v_z, core_hist_v_z) - self.assertEqual(np.prod(obs.shape()), len(np_hist.flatten()) * 3) - - def test_hist_x(self): - self.params['axis'] = 'x' - self.velocity_profile_test() - self.flux_density_profile_test() - self.density_profile_test() - - def test_hist_y(self): - self.params['axis'] = 'y' - self.velocity_profile_test() - self.flux_density_profile_test() - self.density_profile_test() - - def test_hist_z(self): - self.params['axis'] = 'z' - self.velocity_profile_test() - self.flux_density_profile_test() - self.density_profile_test() + np_dens * self.v_phi, core_hist_v_phi) + np.testing.assert_array_almost_equal(np_dens * self.v_z, core_hist_v_z) + self.check_edges(cyl_flux_dens, np_edges) def test_cylindrical_pid_profile_interface(self): - # test setters and getters + """ + Test setters and getters of the script interface + """ params = self.params.copy() params['n_r_bins'] = 4 params['n_phi_bins'] = 6 params['n_z_bins'] = 8 params['ids'] = [0, 1] - params['axis'] = [0.0, 1.0, 0.0] self.system.part.add(id=0, pos=[0, 0, 0], type=0) self.system.part.add(id=1, pos=[0, 0, 0], type=1) observable = espressomd.observables.CylindricalDensityProfile(**params) @@ -266,15 +251,14 @@ def test_cylindrical_pid_profile_interface(self): self.assertEqual(observable.max_z, 9) obs_bin_edges = observable.bin_edges() np.testing.assert_array_equal(obs_bin_edges[-1, -1, -1], [7, 8, 9]) - # check center - np.testing.assert_array_equal( - np.copy(observable.center), params['center']) - observable.center = [3, 2, 1] - np.testing.assert_array_equal(np.copy(observable.center), [3, 2, 1]) - # check axis - np.testing.assert_array_equal(np.copy(observable.axis), params['axis']) - observable.axis = [6, 5, 4] - np.testing.assert_array_equal(np.copy(observable.axis), [6, 5, 4]) + # check center, axis, orientation + ctp = espressomd.math.CylindricalTransformationParameters( + center=[1, 2, 3], axis=[0, 1, 0], orientation=[0, 0, 1]) + observable.transform_params = ctp + + for attr_name in ['center', 'axis', 'orientation']: + np.testing.assert_array_almost_equal(np.copy(ctp.__getattr__(attr_name)), + np.copy(observable.transform_params.__getattr__(attr_name))) if __name__ == "__main__": diff --git a/testsuite/python/observable_cylindricalLB.py b/testsuite/python/observable_cylindricalLB.py index e2ba5a665c6..0d1f72edfc8 100644 --- a/testsuite/python/observable_cylindricalLB.py +++ b/testsuite/python/observable_cylindricalLB.py @@ -18,210 +18,158 @@ import unittest as ut import unittest_decorators as utx import espressomd +import espressomd.math import espressomd.observables import espressomd.lb import tests_common -AGRID = 1.0 -VISC = 2.7 -DENS = 1.7 -TIME_STEP = 0.1 -LB_PARAMS = {'agrid': AGRID, - 'dens': DENS, - 'visc': VISC, - 'tau': TIME_STEP, - } - - class CylindricalLBObservableCommon: """ - Testcase for the CylindricalLBObservable. + Testcase for the CylindricalLBObservables. """ lbf = None - system = espressomd.System(box_l=(10, 10, 10)) + system = espressomd.System(box_l=3 * [15]) system.time_step = 0.01 system.cell_system.skin = 0.4 positions = [] + lb_params = {'agrid': 1., + 'dens': 1.2, + 'visc': 2.7, + 'tau': 0.1, + } + cyl_transform_params = espressomd.math.CylindricalTransformationParameters( + center=3 * [7], axis=[1, 0, 0], orientation=[0, 0, 1]) + params = { - 'ids': list(range(10)), - 'center': [5.0, 5.0, 5.0], # center of the histogram - 'axis': 'y', - 'n_r_bins': 10, # number of bins in r - 'n_phi_bins': 2, # -*- in phi - 'n_z_bins': 2, # -*- in z + 'ids': None, + 'transform_params': cyl_transform_params, + 'n_r_bins': 4, + 'n_phi_bins': 3, + 'n_z_bins': 5, 'min_r': 0.0, 'min_phi': -np.pi, - 'min_z': -5.0, - 'max_r': 5.0, + 'min_z': -6.0, + 'max_r': 6.0, 'max_phi': np.pi, - 'max_z': 5.0, + 'max_z': 6.0, } - def tearDown(self): - self.system.part.clear() - - def swap_axis(self, arr, axis): - if axis == 'x': - arr = np.dot(tests_common.rotation_matrix( - [0, 1, 0], np.pi / 2.0), arr) - elif axis == 'y': - arr = np.dot(tests_common.rotation_matrix( - [1, 0, 0], -np.pi / 2.0), arr) - return arr - - def swap_axis_inverse(self, arr, axis): - if axis == 'x': - arr = np.dot(tests_common.rotation_matrix( - [0, 1, 0], -np.pi / 2.0), arr) - elif axis == 'y': - arr = np.dot(tests_common.rotation_matrix( - [1, 0, 0], np.pi / 2.0), arr) - return arr - - def pol_coords(self): - positions = np.zeros((len(self.positions), 3)) - for i, p in enumerate(self.positions): - tmp = p - np.array(self.params['center']) - tmp = self.swap_axis_inverse(tmp, self.params['axis']) - positions[i, :] = tests_common.transform_pos_from_cartesian_to_polar_coordinates( - tmp) - return positions - - def set_particles(self): - self.system.part.clear() - self.system.part.add(pos=self.positions) - - def set_fluid_velocity(self): - del self.positions[:] - # Choose the cartesian velocities such that each particle gets the same - # v_r, v_phi and v_z, respectively. - self.v_r = .75 - self.v_phi = 2.5 - self.v_z = 1.5 - node_positions = np.arange(-4.5, 5.0, 1.0) - for i, _ in enumerate(node_positions): - position = np.array( - [node_positions[i], node_positions[i], node_positions[i]]) - - e_z = np.array([0, 0, 1]) - e_r = position - (position * e_z) * e_z - e_r /= np.linalg.norm(e_r) - e_phi = np.cross(e_z, e_r) - - velocity = e_r * self.v_r + e_phi * self.v_phi + e_z * self.v_z - - velocity = self.swap_axis(velocity, self.params['axis']) - position = self.swap_axis(position, self.params['axis']) - position += np.array(self.params['center']) - self.positions.append(position) - self.lbf[np.array(position, dtype=int)].velocity = velocity - - def normalize_with_bin_volume(self, histogram): - bin_volume = tests_common.get_cylindrical_bin_volume( - self.params['n_r_bins'], - self.params['n_phi_bins'], - self.params['n_z_bins'], - self.params['min_r'], - self.params['max_r'], - self.params['min_phi'], - self.params['max_phi'], - self.params['min_z'], - self.params['max_z']) - # Normalization - for i in range(self.params['n_r_bins']): - histogram[i, :, :] /= bin_volume[i] - return histogram - - def LB_fluxdensity_profile_test(self): - self.set_fluid_velocity() - self.set_particles() - # Set up the Observable. - local_params = self.params.copy() - if self.params['axis'] == 'x': - local_params['axis'] = [1.0, 0.0, 0.0] - elif self.params['axis'] == 'y': - local_params['axis'] = [0.0, 1.0, 0.0] - else: - local_params['axis'] = [0.0, 0.0, 1.0] - p = espressomd.observables.CylindricalLBFluxDensityProfileAtParticlePositions( - **local_params) - core_hist = p.calculate() - core_hist_v_r = core_hist[:, :, :, 0] - core_hist_v_phi = core_hist[:, :, :, 1] - core_hist_v_z = core_hist[:, :, :, 2] - core_edges = p.call_method("edges") - self.pol_positions = self.pol_coords() + v_r = 0.02 + v_phi = 0.04 + v_z = 0.03 + + def calc_vel_at_pos(self, positions): + """ + In cylindrical coordinates, all velocities are the same. + In cartesian they depend on the position. + The cartesian velocities are calculated here. + """ + + vels = [] + for pos in positions: + e_r, e_phi, e_z = tests_common.get_cylindrical_basis_vectors(pos) + velocity = self.v_r * e_r + self.v_phi * e_phi + self.v_z * e_z + vels.append(velocity) + return vels + + def align_with_observable_frame(self, vec): + """ + Rotate vectors from the original box frame to + the frame of the observables. + """ + + # align original z to observable z + vec = tests_common.rodrigues_rot(vec, [0, 1, 0], np.pi / 2.) + # original x now points along [0,0,-1] + + # align original x to observable orientation + vec = tests_common.rodrigues_rot(vec, [1, 0, 0], np.pi) + return vec + + def setup_system_get_np_hist(self): + """ + Pick positions and velocities in the original box frame and + calculate the np histogram. Then rotate and move the positions + and velocities to the frame of the observables. + After calculating the core observables, the result should be + the same as the np histogram obtained from the original box frame. + """ + + nodes = np.array(np.meshgrid([1, 2], [1, 2], [ + 1, 1, 1, 1, 2])).T.reshape(-1, 3) + positions = nodes + 3 * [0.5] + velocities = self.calc_vel_at_pos(positions) + + # get the histogram from numpy + pos_cyl = [] + for pos in positions: + pos_cyl.append( + tests_common.transform_pos_from_cartesian_to_polar_coordinates(pos)) np_hist, np_edges = tests_common.get_histogram( - self.pol_positions, self.params, 'cylindrical') - np_hist = self.normalize_with_bin_volume(np_hist) - np.testing.assert_array_almost_equal(np_hist * self.v_r, core_hist_v_r) + np.array(pos_cyl), self.params, 'cylindrical') + + # the particles only determine the evaluation points, not the values of + # the observables + np_hist[np.nonzero(np_hist)] = 1 + + # now align the positions and velocities with the frame of reference + # used in the observables + pos_aligned = [] + vel_aligned = [] + for pos, vel in zip(positions, velocities): + pos_aligned.append( + self.align_with_observable_frame(pos) + + self.cyl_transform_params.center) + vel_aligned.append(self.align_with_observable_frame(vel)) + node_aligned = np.array( + np.rint( + np.array(pos_aligned) - + 3 * + [0.5]), + dtype=int) + self.system.part.add(pos=pos_aligned, v=vel_aligned) + self.params['ids'] = self.system.part[:].id + + for node, vel in zip(node_aligned, vel_aligned): + self.lbf[node].velocity = vel + + return np_hist, np_edges + + def check_edges(self, observable, np_edges): + core_edges = observable.call_method("edges") + for core_edge, np_edge in zip(core_edges, np_edges): + np.testing.assert_array_almost_equal(core_edge, np_edge) + + def test_cylindrical_lb_vel_profile_obs(self): + """ + Check that the result from the observable (in its own frame) + matches the np result from the box frame + """ + + np_hist_binary, np_edges = self.setup_system_get_np_hist() + vel_obs = espressomd.observables.CylindricalLBVelocityProfileAtParticlePositions( + **self.params) + core_hist_v = vel_obs.calculate() + core_hist_v_r = core_hist_v[:, :, :, 0] + core_hist_v_phi = core_hist_v[:, :, :, 1] + core_hist_v_z = core_hist_v[:, :, :, 2] np.testing.assert_array_almost_equal( - np_hist * self.v_phi, core_hist_v_phi) - np.testing.assert_array_almost_equal(np_hist * self.v_z, core_hist_v_z) - for i in range(3): - np.testing.assert_array_almost_equal(np_edges[i], core_edges[i]) - self.assertEqual(np.prod(p.shape()), len(np_hist.flatten()) * 3) - - def LB_velocity_profile_at_particle_positions_test(self): - self.set_fluid_velocity() - self.set_particles() - # Set up the Observable. - local_params = self.params.copy() - if self.params['axis'] == 'x': - local_params['axis'] = [1.0, 0.0, 0.0] - elif self.params['axis'] == 'y': - local_params['axis'] = [0.0, 1.0, 0.0] - else: - local_params['axis'] = [0.0, 0.0, 1.0] - p = espressomd.observables.CylindricalLBVelocityProfileAtParticlePositions( - **local_params) - core_hist = p.calculate() - core_hist_v_r = core_hist[:, :, :, 0] - core_hist_v_phi = core_hist[:, :, :, 1] - core_hist_v_z = core_hist[:, :, :, 2] - self.pol_positions = self.pol_coords() - np_hist, _ = np.histogramdd( - self.pol_positions, - bins=(self.params['n_r_bins'], - self.params['n_phi_bins'], - self.params['n_z_bins']), - range=[(self.params['min_r'], - self.params['max_r']), - (self.params['min_phi'], - self.params['max_phi']), - (self.params['min_z'], - self.params['max_z'])]) - for x in np.nditer(np_hist, op_flags=['readwrite']): - if x[...] > 0.0: - x[...] /= x[...] - np.testing.assert_array_almost_equal(np_hist * self.v_r, core_hist_v_r) + np_hist_binary * self.v_r, core_hist_v_r) np.testing.assert_array_almost_equal( - np_hist * self.v_phi, core_hist_v_phi) - np.testing.assert_array_almost_equal(np_hist * self.v_z, core_hist_v_z) - self.assertEqual(np.prod(p.shape()), len(np_hist.flatten()) * 3) - - def perform_tests(self): - self.LB_fluxdensity_profile_test() - self.LB_velocity_profile_at_particle_positions_test() - - def test_x_axis(self): - self.params['axis'] = 'x' - self.perform_tests() - - def test_y_axis(self): - self.params['axis'] = 'y' - self.perform_tests() - - def test_z_axis(self): - self.params['axis'] = 'z' - self.perform_tests() + np_hist_binary * self.v_phi, core_hist_v_phi) + np.testing.assert_array_almost_equal( + np_hist_binary * self.v_z, core_hist_v_z) + self.check_edges(vel_obs, np_edges) def test_cylindrical_lb_profile_interface(self): - # test setters and getters + """ + Test setters and getters of the script interface + """ + params = self.params.copy() params['n_r_bins'] = 4 params['n_phi_bins'] = 6 @@ -269,25 +217,20 @@ def test_cylindrical_lb_profile_interface(self): self.assertEqual(observable.max_z, 9) obs_bin_edges = observable.bin_edges() np.testing.assert_array_equal(obs_bin_edges[-1, -1, -1], [7, 8, 9]) - # check center - np.testing.assert_array_equal( - np.copy(observable.center), params['center']) - observable.center = [3, 2, 1] - np.testing.assert_array_equal(np.copy(observable.center), [3, 2, 1]) - # check axis - np.testing.assert_array_equal(np.copy(observable.axis), params['axis']) - observable.axis = [6, 5, 4] - np.testing.assert_array_equal(np.copy(observable.axis), [6, 5, 4]) - # check sampling_density - self.assertEqual(observable.sampling_density, 2) - observable.sampling_density = 3 - self.assertEqual(observable.sampling_density, 3) + # check center, axis, orientation + ctp = espressomd.math.CylindricalTransformationParameters( + center=[1, 2, 3], axis=[0, 1, 0], orientation=[0, 0, 1]) + observable.transform_params = ctp + + for attr_name in ['center', 'axis', 'orientation']: + np.testing.assert_array_almost_equal(np.copy(ctp.__getattr__(attr_name)), + np.copy(observable.transform_params.__getattr__(attr_name))) class CylindricalLBObservableCPU(ut.TestCase, CylindricalLBObservableCommon): def setUp(self): - self.lbf = espressomd.lb.LBFluid(**LB_PARAMS) + self.lbf = espressomd.lb.LBFluid(**self.lb_params) self.system.actors.add(self.lbf) def tearDown(self): @@ -295,16 +238,47 @@ def tearDown(self): self.system.actors.remove(self.lbf) self.system.part.clear() + def test_cylindrical_lb_flux_density_obs(self): + """ + Check that the result from the observable (in its own frame) + matches the np result from the box frame. + Only for CPU because density interpolation is not implemented for GPU LB. + """ + np_hist_binary, np_edges = self.setup_system_get_np_hist() + + flux_obs = espressomd.observables.CylindricalLBFluxDensityProfileAtParticlePositions( + **self.params) + core_hist_fl = flux_obs.calculate() + core_hist_fl_r = core_hist_fl[:, :, :, 0] + core_hist_fl_phi = core_hist_fl[:, :, :, 1] + core_hist_fl_z = core_hist_fl[:, :, :, 2] + + np.testing.assert_array_almost_equal( + np_hist_binary * + self.lb_params['dens'] * + self.v_r, + core_hist_fl_r) + np.testing.assert_array_almost_equal( + np_hist_binary * + self.lb_params['dens'] * + self.v_phi, + core_hist_fl_phi) + np.testing.assert_array_almost_equal( + np_hist_binary * + self.lb_params['dens'] * + self.v_z, + core_hist_fl_z) + self.check_edges(flux_obs, np_edges) + @utx.skipIfMissingGPU() class CylindricalLBObservableGPU(ut.TestCase, CylindricalLBObservableCommon): def setUp(self): - self.lbf = espressomd.lb.LBFluidGPU(**LB_PARAMS) + self.lbf = espressomd.lb.LBFluidGPU(**self.lb_params) self.system.actors.add(self.lbf) def tearDown(self): - del self.positions[:] self.system.actors.remove(self.lbf) self.system.part.clear() diff --git a/testsuite/python/p3m_fft.py b/testsuite/python/p3m_fft.py new file mode 100644 index 00000000000..872a4ec59b8 --- /dev/null +++ b/testsuite/python/p3m_fft.py @@ -0,0 +1,129 @@ +# +# Copyright (C) 2020-2021 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import espressomd +import numpy as np +import unittest as ut +import unittest_decorators as utx +import tests_common + +P3M_PARAMS = [ + {'cao': 7, 'r_cut': 3.103065490722656, 'alpha': 1.228153768561588, 'mesh': 48}, + {'cao': 7, 'r_cut': 4.477272033691406, 'alpha': 0.845808585620971, 'mesh': 32}, + {'cao': 7, 'r_cut': 2.393871545791626, 'alpha': 1.599093835130641, 'mesh': 64}, +] + +FFT_PLANS = { + 1: [([1, 1, 1], P3M_PARAMS[1])], + 2: [([2, 1, 1], P3M_PARAMS[1])], + 3: [([3, 1, 1], P3M_PARAMS[0])], + 4: [([2, 2, 1], P3M_PARAMS[1]), + ([4, 1, 1], P3M_PARAMS[2])], + 6: [([3, 2, 1], P3M_PARAMS[0])], + 8: [([2, 2, 2], P3M_PARAMS[1]), + ([4, 2, 1], P3M_PARAMS[2])], +} + + +@utx.skipIfMissingFeatures(["LENNARD_JONES", "P3M"]) +class FFT_test(ut.TestCase): + + system = espressomd.System(box_l=[10., 10., 10.]) + original_node_grid = tuple(system.cell_system.node_grid) + n_nodes = system.cell_system.get_state()["n_nodes"] + + def setUp(self): + self.system.box_l = [10., 10., 10.] + self.system.cell_system.node_grid = self.original_node_grid + self.system.time_step = 0.01 + + def tearDown(self): + self.system.actors.clear() + self.system.part.clear() + + def minimize(self): + self.system.non_bonded_inter[0, 0].lennard_jones.set_params( + epsilon=1.0, sigma=1.0, cutoff=2**(1.0 / 6.0), shift="auto") + self.system.integrator.set_steepest_descent( + f_max=1, gamma=0.01, max_displacement=0.01) + self.system.integrator.run(100) + self.system.integrator.set_vv() + self.system.non_bonded_inter[0, 0].lennard_jones.set_params( + epsilon=0.0, sigma=1.0, cutoff=2) + + def add_charged_particles(self): + np.random.seed(seed=42) + num_pairs = 200 + positions = np.random.random((2 * num_pairs, 3)) + self.system.part.add(pos=positions * self.system.box_l, + q=num_pairs * [-1, 1]) + self.minimize() + + def add_magnetic_particles(self): + np.random.seed(seed=42) + num_part = 200 + positions = np.random.random((num_part, 3)) + dipoles = tests_common.random_dipoles(num_part) + self.system.part.add(pos=positions * self.system.box_l, + dip=dipoles, rotation=num_part * [(1, 1, 1)]) + self.minimize() + + @ut.skipIf(n_nodes not in FFT_PLANS, f"no FFT plan for {n_nodes} threads") + def test_fft_plans(self): + import espressomd.electrostatics + self.system.time_step = 0.01 + self.add_charged_particles() + for node_grid, p3m_params in FFT_PLANS[self.n_nodes]: + self.system.cell_system.node_grid = node_grid + solver = espressomd.electrostatics.P3M( + prefactor=2, accuracy=1e-6, tune=False, **p3m_params) + self.system.actors.add(solver) + ref_energy = -75.871906 + p3m_energy = self.system.analysis.energy()['coulomb'] + self.system.actors.clear() + np.testing.assert_allclose(p3m_energy, ref_energy, rtol=1e-4) + + @utx.skipIfMissingFeatures("P3M") + @ut.skipIf(n_nodes < 2 or n_nodes >= 8, "only runs for 2 <= n_nodes <= 7") + def test_unsorted_node_grid_exception_p3m(self): + import espressomd.electrostatics + self.system.time_step = 0.01 + self.add_charged_particles() + unsorted_node_grid = self.system.cell_system.node_grid[::-1] + self.system.cell_system.node_grid = unsorted_node_grid + solver = espressomd.electrostatics.P3M(prefactor=2, accuracy=1e-2) + with self.assertRaisesRegex(Exception, 'P3M: tuning failed: ERROR: P3M_init: node grid must be sorted, largest first'): + self.system.actors.add(solver) + + @utx.skipIfMissingFeatures("DP3M") + @ut.skipIf(n_nodes < 2 or n_nodes >= 8, "only runs for 2 <= n_nodes <= 7") + def test_unsorted_node_grid_exception_dp3m(self): + import espressomd.magnetostatics + self.system.time_step = 0.01 + self.add_magnetic_particles() + unsorted_node_grid = self.system.cell_system.node_grid[::-1] + self.system.cell_system.node_grid = unsorted_node_grid + solver = espressomd.magnetostatics.DipolarP3M( + prefactor=2, accuracy=1e-2) + with self.assertRaisesRegex(Exception, 'P3M: tuning failed: ERROR: dipolar P3M_init: node grid must be sorted, largest first'): + self.system.actors.add(solver) + + +if __name__ == "__main__": + ut.main() diff --git a/testsuite/python/p3m_tuning_exceptions.py b/testsuite/python/p3m_tuning_exceptions.py index c3f27d3aec1..e3fbc4c5d65 100644 --- a/testsuite/python/p3m_tuning_exceptions.py +++ b/testsuite/python/p3m_tuning_exceptions.py @@ -34,7 +34,7 @@ def add_charged_particles(self): self.system.part.add(pos=[[0, 0, 0], [.5, .5, .5]], q=[-1, 1]) def add_magnetic_particles(self): - self.system.part.add(pos=[[0, 0, 0], [.5, .5, .5]], + self.system.part.add(pos=[[0.01, 0.01, 0.01], [.5, .5, .5]], rotation=2 * [(1, 1, 1)], dip=2 * [(1, 0, 0)]) ################################################## @@ -49,7 +49,7 @@ def test_01_time_not_set_p3m_gpu(self): self.add_charged_particles() solver = espressomd.electrostatics.P3MGPU(prefactor=2, accuracy=1e-2) - with self.assertRaisesRegex(Exception, 'python_p3m_adaptive_tune: ERROR: time_step not set'): + with self.assertRaisesRegex(Exception, 'P3M: tuning failed: ERROR: time_step not set'): self.system.actors.add(solver) @utx.skipIfMissingFeatures("P3M") @@ -59,7 +59,7 @@ def test_01_time_not_set_p3m_cpu(self): self.add_charged_particles() solver = espressomd.electrostatics.P3M(prefactor=2, accuracy=1e-2) - with self.assertRaisesRegex(Exception, 'python_p3m_adaptive_tune: ERROR: time_step not set'): + with self.assertRaisesRegex(Exception, 'P3M: tuning failed: ERROR: time_step not set'): self.system.actors.add(solver) @utx.skipIfMissingFeatures("DP3M") @@ -70,7 +70,7 @@ def test_01_time_not_set_dp3m_cpu(self): solver = espressomd.magnetostatics.DipolarP3M( prefactor=2, accuracy=1e-2) - with self.assertRaisesRegex(Exception, 'python_dp3m_adaptive_tune: ERROR: time_step not set'): + with self.assertRaisesRegex(Exception, 'P3M: tuning failed: ERROR: time_step not set'): self.system.actors.add(solver) ############################################## @@ -85,7 +85,7 @@ def test_02_no_particles_p3m_gpu(self): self.system.time_step = 0.01 solver = espressomd.electrostatics.P3MGPU(prefactor=2, accuracy=1e-2) - with self.assertRaisesRegex(Exception, 'python_p3m_adaptive_tune: ERROR: no charged particles in the system'): + with self.assertRaisesRegex(Exception, 'P3M: tuning failed: ERROR: no charged particles in the system'): self.system.actors.add(solver) @utx.skipIfMissingFeatures("P3M") @@ -95,7 +95,7 @@ def test_02_no_particles_p3m_cpu(self): self.system.time_step = 0.01 solver = espressomd.electrostatics.P3M(prefactor=2, accuracy=1e-2) - with self.assertRaisesRegex(Exception, 'python_p3m_adaptive_tune: ERROR: no charged particles in the system'): + with self.assertRaisesRegex(Exception, 'P3M: tuning failed: ERROR: no charged particles in the system'): self.system.actors.add(solver) @utx.skipIfMissingFeatures("DP3M") @@ -106,7 +106,7 @@ def test_02_no_particles_dp3m_cpu(self): solver = espressomd.magnetostatics.DipolarP3M( prefactor=2, accuracy=1e-2) - with self.assertRaisesRegex(Exception, 'python_dp3m_adaptive_tune: ERROR: no dipolar particles in the system'): + with self.assertRaisesRegex(Exception, 'P3M: tuning failed: ERROR: no dipolar particles in the system'): self.system.actors.add(solver) ####################################### @@ -124,7 +124,7 @@ def test_03_non_cubic_box_p3m_gpu(self): solver = espressomd.electrostatics.P3MGPU( prefactor=2, accuracy=1e-2, epsilon=1) - with self.assertRaisesRegex(Exception, 'python_p3m_adaptive_tune: ERROR: non-metallic epsilon requires cubic box'): + with self.assertRaisesRegex(Exception, 'P3M: tuning failed: ERROR: non-metallic epsilon requires cubic box'): self.system.actors.add(solver) @utx.skipIfMissingFeatures("P3M") @@ -137,7 +137,7 @@ def test_03_non_cubic_box_p3m_cpu(self): solver = espressomd.electrostatics.P3M( prefactor=2, accuracy=1e-2, epsilon=1) - with self.assertRaisesRegex(Exception, 'python_p3m_adaptive_tune: ERROR: non-metallic epsilon requires cubic box'): + with self.assertRaisesRegex(Exception, 'P3M: tuning failed: ERROR: non-metallic epsilon requires cubic box'): self.system.actors.add(solver) @utx.skipIfMissingFeatures("DP3M") @@ -150,9 +150,108 @@ def test_03_non_cubic_box_dp3m_cpu(self): solver = espressomd.magnetostatics.DipolarP3M( prefactor=2, accuracy=1e-2) - with self.assertRaisesRegex(Exception, 'python_dp3m_adaptive_tune: ERROR: dipolar P3M requires a cubic box'): + with self.assertRaisesRegex(Exception, 'P3M: tuning failed: ERROR: dipolar P3M requires a cubic box'): self.system.actors.add(solver) + ########################################## + # block of tests with invalid parameters # + ########################################## + + def check_invalid_params(self, solver_class, **custom_params): + valid_params = { + 'prefactor': 2, 'accuracy': .01, 'tune': False, 'cao': 1, + 'r_cut': 0.373, 'alpha': 3.81, 'mesh': (8, 8, 8), + 'mesh_off': [-1, -1, -1]} + valid_params.update(custom_params) + + invalid_params = [ + ('cao', 0, 'P3M: invalid cao'), + ('cao', 8, 'P3M: invalid cao'), + ('r_cut', -2.0, 'P3M: invalid r_cut'), + ('alpha', -2.0, 'P3M: invalid alpha'), + ('accuracy', -2.0, 'P3M: invalid accuracy'), + ('mesh', (-1, -1, -1), 'P3M: invalid mesh size'), + ('mesh', (0, 0, 0), 'P3M: cao larger than mesh size'), + ('mesh_off', (-2, 1, 1), 'P3M: invalid mesh offset'), + ] + + for key, invalid_value, err_msg in invalid_params: + params = valid_params.copy() + params[key] = invalid_value + solver = solver_class(**params) + with self.assertRaisesRegex(RuntimeError, err_msg): + self.system.actors.add(solver) + self.system.actors.clear() + + @utx.skipIfMissingFeatures("P3M") + def test_04_invalid_params_p3m_cpu(self): + import espressomd.electrostatics + + self.system.time_step = 0.01 + self.add_charged_particles() + + self.check_invalid_params(espressomd.electrostatics.P3M) + + @utx.skipIfMissingGPU() + @utx.skipIfMissingFeatures("P3M") + def test_04_invalid_params_p3m_gpu(self): + import espressomd.electrostatics + + self.system.time_step = 0.01 + self.add_charged_particles() + + self.check_invalid_params(espressomd.electrostatics.P3MGPU, + mesh=3 * [28], alpha=0.3548, r_cut=4.4434) + + @utx.skipIfMissingFeatures("DP3M") + def test_04_invalid_params_dp3m_cpu(self): + import espressomd.magnetostatics + + self.system.time_step = 0.01 + self.add_magnetic_particles() + + self.check_invalid_params(espressomd.magnetostatics.DipolarP3M) + + @utx.skipIfMissingFeatures("P3M") + def test_04_invalid_params_p3m_elc_cpu(self): + import espressomd.electrostatics + + self.system.time_step = 0.01 + self.add_charged_particles() + + solver_p3m = espressomd.electrostatics.P3M( + prefactor=2, accuracy=0.01, tune=False, cao=1, + r_cut=0.373, alpha=3.81, mesh=(8, 8, 8)) + solver_elc = espressomd.electrostatics.ELC( + p3m_actor=solver_p3m, gap_size=1.2 * self.system.box_l[2], + maxPWerror=0.01) + with self.assertRaisesRegex(Exception, "gap size too large"): + self.system.actors.add(solver_elc) + + self.system.actors.clear() + solver_dh = espressomd.electrostatics.DH( + prefactor=1.2, kappa=0.8, r_cut=2.0) + solver_elc = espressomd.electrostatics.ELC( + p3m_actor=solver_dh, gap_size=1, maxPWerror=0.01) + with self.assertRaisesRegex(ValueError, "p3m_actor has to be a P3M solver"): + self.system.actors.add(solver_elc) + + @utx.skipIfMissingGPU() + @utx.skipIfMissingFeatures("P3M") + def test_04_invalid_params_p3m_elc_gpu(self): + import espressomd.electrostatics + + self.system.time_step = 0.01 + self.add_charged_particles() + + solver_p3m = espressomd.electrostatics.P3MGPU( + prefactor=2, accuracy=0.01, tune=False, cao=1, + r_cut=4.4434, alpha=0.3548, mesh=(28, 28, 28)) + solver_elc = espressomd.electrostatics.ELC( + p3m_actor=solver_p3m, gap_size=1, maxPWerror=0.01) + with self.assertRaisesRegex(ValueError, "ELC is not set up to work with the GPU P3M"): + self.system.actors.add(solver_elc) + ########################################################### # block of tests where tuning should not throw exceptions # ########################################################### @@ -179,12 +278,20 @@ def test_09_no_errors_p3m_cpu(self): self.system.time_step = 0.01 self.add_charged_particles() - solver = espressomd.electrostatics.P3M(prefactor=2, accuracy=1e-2, - epsilon='metallic') - try: - self.system.actors.add(solver) - except Exception as err: - self.fail('tuning raised Exception("' + str(err) + '")') + solver = espressomd.electrostatics.P3M(prefactor=2, accuracy=0.1) + valid_params = { + 'mesh_off': solver.default_params()['mesh_off'], # sentinel + 'cao': 2, 'r_cut': 3.18, 'mesh': 8} + + # tuning with cao or r_cut or mesh constrained, or without constraints + for key, value in valid_params.items(): + solver = espressomd.electrostatics.P3M( + prefactor=2, accuracy=1e-2, epsilon=0.0, **{key: value}) + try: + self.system.actors.add(solver) + except Exception as err: + self.fail('tuning raised Exception("' + str(err) + '")') + self.system.actors.clear() @utx.skipIfMissingFeatures("DP3M") def test_09_no_errors_dp3m_cpu(self): @@ -194,11 +301,61 @@ def test_09_no_errors_dp3m_cpu(self): self.add_magnetic_particles() solver = espressomd.magnetostatics.DipolarP3M( - prefactor=2, accuracy=1e-2) + prefactor=2, accuracy=0.1) + valid_params = { + 'mesh_off': solver.default_params()['mesh_off'], # sentinel + 'cao': 1, 'r_cut': 3.28125, 'mesh': 5} + + # tuning with cao or r_cut or mesh constrained, or without constraints + for key, value in valid_params.items(): + solver = espressomd.magnetostatics.DipolarP3M( + prefactor=2, accuracy=1e-2, **{key: value}) + try: + self.system.actors.add(solver) + except Exception as err: + self.fail('tuning raised Exception("' + str(err) + '")') + self.system.actors.clear() + + @utx.skipIfMissingFeatures("P3M") + def test_09_no_errors_p3m_cpu_rescale_mesh(self): + import espressomd.electrostatics + + self.system.box_l = [10., 15., 20.] + self.system.time_step = 0.01 + self.add_charged_particles() + + solver = espressomd.electrostatics.P3M(prefactor=2, accuracy=1e-2, + epsilon='metallic', + mesh=[8, -1, -1]) + try: + self.system.actors.add(solver) + except Exception as err: + self.fail('tuning raised Exception("' + str(err) + '")') + tuned_mesh = solver.get_params()['mesh'] + self.assertEqual(tuned_mesh[0], 8) + self.assertEqual(tuned_mesh[1], 12) + self.assertEqual(tuned_mesh[2], 16) + + @utx.skipIfMissingGPU() + @utx.skipIfMissingFeatures("P3M") + def test_09_no_errors_p3m_gpu_rescale_mesh(self): + import espressomd.electrostatics + + self.system.box_l = [10., 15., 20.] + self.system.time_step = 0.01 + self.add_charged_particles() + + solver = espressomd.electrostatics.P3MGPU(prefactor=2, accuracy=1e-1, + epsilon='metallic', + mesh=[20, -1, -1]) try: self.system.actors.add(solver) except Exception as err: self.fail('tuning raised Exception("' + str(err) + '")') + tuned_mesh = solver.get_params()['mesh'] + self.assertEqual(tuned_mesh[0], 20) + self.assertEqual(tuned_mesh[1], 30) + self.assertEqual(tuned_mesh[2], 40) if __name__ == "__main__": diff --git a/testsuite/python/save_checkpoint.py b/testsuite/python/save_checkpoint.py index a9b4698dee3..ec7b9996d66 100644 --- a/testsuite/python/save_checkpoint.py +++ b/testsuite/python/save_checkpoint.py @@ -21,6 +21,7 @@ import espressomd import espressomd.checkpointing import espressomd.electrostatics +import espressomd.magnetostatics import espressomd.interactions import espressomd.virtual_sites import espressomd.accumulators @@ -36,8 +37,10 @@ modes = {x for mode in set("@TEST_COMBINATION@".upper().split('-')) for x in [mode, mode.split('.')[0]]} -# use a box with 3 different dimensions +# use a box with 3 different dimensions, unless DipolarP3M is used system = espressomd.System(box_l=[12.0, 14.0, 16.0]) +if 'DP3M' in modes: + system.box_l = 3 * [np.max(system.box_l)] system.cell_system.skin = 0.1 system.time_step = 0.01 system.min_global_cut = 2.0 @@ -89,15 +92,21 @@ ek.add_species(ek_species) system.actors.add(ek) -system.part.add(pos=[1.0] * 3) -system.part.add(pos=[1.0, 1.0, 2.0]) +p1 = system.part.add(pos=[1.0] * 3) +p2 = system.part.add(pos=[1.0, 1.0, 2.0]) + +if espressomd.has_features('ELECTROSTATICS'): + p1.q = 1 + p2.q = -1 + +if espressomd.has_features('DIPOLES'): + p1.dip = (1.3, 2.1, -6) + p2.dip = (7.3, 6.1, -4) if espressomd.has_features('EXCLUSIONS'): system.part.add(pos=[2.0] * 3, exclusions=[0, 1]) -if espressomd.has_features('P3M') and 'P3M.CPU' in modes: - system.part[0].q = 1 - system.part[1].q = -1 +if espressomd.has_features('P3M') and 'P3M' in modes: p3m = espressomd.electrostatics.P3M( prefactor=1.0, accuracy=0.1, @@ -106,7 +115,16 @@ alpha=1.0, r_cut=1.0, tune=False) - system.actors.add(p3m) + if 'P3M.CPU' in modes: + system.actors.add(p3m) + elif 'P3M.ELC' in modes: + elc = espressomd.electrostatics.ELC( + p3m_actor=p3m, + gap_size=6.0, + maxPWerror=0.1, + delta_mid_top=0.9, + delta_mid_bot=0.1) + system.actors.add(elc) obs = espressomd.observables.ParticlePositions(ids=[0, 1]) acc_mean_variance = espressomd.accumulators.MeanVarianceCalculator(obs=obs) @@ -117,7 +135,7 @@ acc_mean_variance.update() acc_time_series.update() acc_correlator.update() -system.part[0].pos = [1.0, 2.0, 3.0] +p1.pos = [1.0, 2.0, 3.0] acc_mean_variance.update() acc_time_series.update() acc_correlator.update() @@ -138,7 +156,8 @@ system.box_l, np.ones(3), lambda x: np.linalg.norm(10 * np.ones(3) - x)) checkpoint.register("pot_field_data") system.constraints.add(constraints.PotentialField( - field=pot_field_data, grid_spacing=np.ones(3), default_scale=1.6)) + field=pot_field_data, grid_spacing=np.ones(3), default_scale=1.6, + particle_scales={5: 6.0})) vec_field_data = constraints.ForceField.field_from_fn( system.box_l, np.ones(3), lambda x: 10 * np.ones(3) - x) checkpoint.register("vec_field_data") @@ -181,7 +200,7 @@ if espressomd.has_features(['VIRTUAL_SITES', 'VIRTUAL_SITES_RELATIVE']): system.virtual_sites = espressomd.virtual_sites.VirtualSitesRelative( have_quaternion=True) - system.part[1].vs_auto_relate_to(0) + p2.vs_auto_relate_to(p1) if espressomd.has_features(['LENNARD_JONES']) and 'LJ' in modes: system.non_bonded_inter[0, 0].lennard_jones.set_params( @@ -191,27 +210,67 @@ harmonic_bond = espressomd.interactions.HarmonicBond(r_0=0.0, k=1.0) system.bonded_inter.add(harmonic_bond) -system.part[1].add_bond((harmonic_bond, 0)) +p2.add_bond((harmonic_bond, p1)) if 'THERM.LB' not in modes: thermalized_bond = espressomd.interactions.ThermalizedBond( temp_com=0.0, gamma_com=0.0, temp_distance=0.2, gamma_distance=0.5, r_cut=2, seed=51) system.bonded_inter.add(thermalized_bond) - system.part[1].add_bond((thermalized_bond, 0)) + p2.add_bond((thermalized_bond, p1)) checkpoint.register("system") checkpoint.register("acc_mean_variance") checkpoint.register("acc_time_series") checkpoint.register("acc_correlator") # calculate forces system.integrator.run(0) -particle_force0 = np.copy(system.part[0].f) -particle_force1 = np.copy(system.part[1].f) +particle_force0 = np.copy(p1.f) +particle_force1 = np.copy(p2.f) checkpoint.register("particle_force0") checkpoint.register("particle_force1") if espressomd.has_features("COLLISION_DETECTION"): system.collision_detection.set_params( mode="bind_centers", distance=0.11, bond_centers=harmonic_bond) +if espressomd.has_features('DP3M') and 'DP3M' in modes: + dp3m = espressomd.magnetostatics.DipolarP3M( + prefactor=1., + epsilon=2., + mesh_off=[0.5, 0.5, 0.5], + r_cut=2.4, + cao=1, + mesh=[8, 8, 8], + alpha=12, + accuracy=0.01, + tune=False) + system.actors.add(dp3m) + +if espressomd.has_features('SCAFACOS') and 'SCAFACOS' in modes \ + and 'p3m' in espressomd.scafacos.available_methods(): + system.actors.add(espressomd.electrostatics.Scafacos( + prefactor=0.5, + method_name="p3m", + method_params={ + "p3m_r_cut": 1.0, + "p3m_grid": 64, + "p3m_cao": 7, + "p3m_alpha": 2.084652})) + +if espressomd.has_features('SCAFACOS_DIPOLES') and 'SCAFACOS' in modes \ + and 'p2nfft' in espressomd.scafacos.available_methods(): + system.actors.add(espressomd.magnetostatics.Scafacos( + prefactor=1.2, + method_name='p2nfft', + method_params={ + "p2nfft_verbose_tuning": "0", + "pnfft_N": "32,32,32", + "pnfft_n": "32,32,32", + "pnfft_window_name": "bspline", + "pnfft_m": "4", + "p2nfft_ignore_tolerance": "1", + "pnfft_diff_ik": "0", + "p2nfft_r_cut": "11", + "p2nfft_alpha": "0.37"})) + if LB_implementation: m = np.pi / 12 nx = int(np.round(system.box_l[0] / lbf.get_params()["agrid"])) diff --git a/testsuite/python/scafacos_interface.py b/testsuite/python/scafacos_interface.py index 07d76194bb6..d71790149d6 100644 --- a/testsuite/python/scafacos_interface.py +++ b/testsuite/python/scafacos_interface.py @@ -50,6 +50,9 @@ def test_available_methods(self): for method in available_methods: self.assertIn(method, scafacos_methods) + @ut.skipIf(not espressomd.has_features('SCAFACOS') or + 'p3m' not in espressomd.scafacos.available_methods(), + 'Skipping test: missing ScaFaCoS p3m method') def test_actor_exceptions(self): system = self.system @@ -71,6 +74,9 @@ def test_actor_exceptions(self): prefactor=1, method_name="p3m", method_params={})) system.actors.clear() + @ut.skipIf(not espressomd.has_features('SCAFACOS') or + 'p3m' not in espressomd.scafacos.available_methods(), + 'Skipping test: missing ScaFaCoS p3m method') def test_actor_coulomb(self): system = self.system @@ -79,6 +85,7 @@ def test_actor_coulomb(self): method_name="p3m", method_params={ "p3m_r_cut": 1.0, + "p3m_alpha": 2.799269, "p3m_grid": 32, "p3m_cao": 7})) actor = system.actors[0] @@ -86,9 +93,12 @@ def test_actor_coulomb(self): self.assertEqual(params["prefactor"], 0.5) self.assertEqual(params["method_name"], "p3m") self.assertEqual(params["method_params"], - {'p3m_cao': '7', 'p3m_r_cut': '1.0', 'p3m_grid': '32'}) + {'p3m_cao': '7', 'p3m_r_cut': '1.0', + 'p3m_grid': '32', 'p3m_alpha': '2.799269'}) - @utx.skipIfMissingFeatures(["SCAFACOS_DIPOLES"]) + @ut.skipIf(not espressomd.has_features('SCAFACOS_DIPOLES') or + 'p2nfft' not in espressomd.scafacos.available_methods(), + 'Skipping test: missing ScaFaCoS p2nfft method') def test_actor_dipoles(self): system = self.system @@ -128,6 +138,7 @@ def p3m_data(self): accuracy=1e-5, cao=7, mesh=48, + r_cut=1.88672, epsilon="metallic") system.actors.add(dp3m) @@ -146,11 +157,18 @@ def fcs_data(self): scafacos_coulomb = espressomd.electrostatics.Scafacos( prefactor=0.5, - method_name="p3m", + method_name="p2nfft", method_params={ - "p3m_r_cut": 1.0, - "p3m_grid": 32, - "p3m_cao": 7}) + "p2nfft_verbose_tuning": 0, + "pnfft_N": "32,32,32", + "pnfft_n": "32,32,32", + "tolerance_field": "5e-4", + "pnfft_window_name": "bspline", + "pnfft_m": "4", + "p2nfft_ignore_tolerance": "1", + "pnfft_diff_ik": "0", + "p2nfft_r_cut": "1.0", + "p2nfft_alpha": "2.92"}) system.actors.add(scafacos_coulomb) scafacos_dipoles = espressomd.magnetostatics.Scafacos( @@ -178,7 +196,10 @@ def fcs_data(self): return (ref_E_coulomb, ref_E_dipoles, ref_forces, ref_torques) - @utx.skipIfMissingFeatures(["SCAFACOS_DIPOLES", "LENNARD_JONES"]) + @utx.skipIfMissingFeatures("LENNARD_JONES") + @ut.skipIf(not espressomd.has_features('SCAFACOS_DIPOLES') or + 'p2nfft' not in espressomd.scafacos.available_methods(), + 'Skipping test: missing SCAFACOS_DIPOLES or p2nfft method') def test_electrostatics_plus_magnetostatics(self): # check that two instances of ScaFaCoS can be used system = self.system @@ -206,7 +227,7 @@ def test_electrostatics_plus_magnetostatics(self): self.assertAlmostEqual(fcs_E_coulomb, p3m_E_coulomb, delta=1e-4) self.assertAlmostEqual(fcs_E_dipoles, p3m_E_dipoles, delta=1e-4) - np.testing.assert_allclose(fcs_forces, p3m_forces, rtol=1e-3) + np.testing.assert_allclose(fcs_forces, p3m_forces, rtol=1e-2) np.testing.assert_allclose(fcs_torques, p3m_torques, rtol=1e-3) diff --git a/testsuite/python/shapes.py b/testsuite/python/shapes.py index 51e3f034319..063ed0618fe 100644 --- a/testsuite/python/shapes.py +++ b/testsuite/python/shapes.py @@ -27,9 +27,22 @@ def test_Union(self): union = espressomd.shapes.Union() wall1 = espressomd.shapes.Wall(normal=[0, 0, 1], dist=0) wall2 = espressomd.shapes.Wall(normal=[0, 0, -1], dist=-10) + self.assertTrue(union.call_method('empty')) union.add([wall1, wall2]) + self.assertFalse(union.call_method('empty')) self.assertEqual(union.size(), 2) + # check object retrieval + pwall1, pwall2 = union.call_method('get_elements') + self.assertIsInstance(pwall1, espressomd.shapes.Wall) + self.assertIsInstance(pwall2, espressomd.shapes.Wall) + np.testing.assert_almost_equal( + np.copy(pwall1.normal), np.copy(wall1.normal)) + np.testing.assert_almost_equal( + np.copy(pwall2.normal), np.copy(wall2.normal)) + np.testing.assert_almost_equal(pwall1.dist, wall1.dist) + np.testing.assert_almost_equal(pwall2.dist, wall2.dist) + self.assertAlmostEqual(union.calc_distance( position=[1, 2, 4.5])[0], 4.5) self.assertAlmostEqual(union.calc_distance( @@ -41,6 +54,7 @@ def test_Union(self): with self.assertRaises(ValueError): union.calc_distance(position=[1, 2, 11.5]) union.clear() + self.assertTrue(union.call_method('empty')) self.assertEqual(union.size(), 0) self.assertEqual(union.calc_distance(position=[1, 2, 6.5])[0], np.inf) diff --git a/testsuite/python/test_checkpoint.py b/testsuite/python/test_checkpoint.py index 9ba36f2e049..34eedc6954f 100644 --- a/testsuite/python/test_checkpoint.py +++ b/testsuite/python/test_checkpoint.py @@ -22,6 +22,9 @@ import espressomd import espressomd.checkpointing +import espressomd.electrostatics +import espressomd.magnetostatics +import espressomd.scafacos import espressomd.virtual_sites import espressomd.integrate from espressomd.shapes import Sphere, Wall @@ -44,6 +47,16 @@ def setUpClass(cls): '.', '__'), checkpoint_path="@CMAKE_CURRENT_BINARY_DIR@") cls.checkpoint.load(0) + cls.ref_box_l = np.array([12.0, 14.0, 16.0]) + if 'DP3M' in modes: + cls.ref_box_l = np.array([16.0, 16.0, 16.0]) + + def get_active_actor_of_type(self, actor_type): + for actor in system.actors.active_actors: + if isinstance(actor, actor_type): + return actor + self.fail( + f"system doesn't have an actor of type {actor_type.__name__}") @ut.skipIf(not LB, "Skipping test due to missing mode.") def test_LB(self): @@ -107,30 +120,20 @@ def test_EK(self): self.assertIn(key, state) self.assertAlmostEqual(reference[key], state[key], delta=1E-5) state_species = ek_species.get_params() - reference_species = {'density': 0.4, 'D': 0.02, 'valency': 0.3} + reference_species = {'density': 0.4, 'D': 0.02, 'valency': 0.3, + 'ext_force_density': [0.01, -0.08, 0.06]} for key in reference_species: self.assertIn(key, state_species) - self.assertAlmostEqual( + np.testing.assert_allclose( reference_species[key], state_species[key], - delta=1E-5) - self.assertAlmostEqual( - state_species['ext_force_density'][0], - 0.01, - delta=1E-5) - self.assertAlmostEqual( - state_species['ext_force_density'][1], - -0.08, - delta=1E-5) - self.assertAlmostEqual( - state_species['ext_force_density'][2], - 0.06, - delta=1E-5) + atol=1E-5) def test_variables(self): self.assertEqual(system.cell_system.skin, 0.1) self.assertEqual(system.time_step, 0.01) self.assertEqual(system.min_global_cut, 2.0) + np.testing.assert_allclose(np.copy(system.box_l), self.ref_box_l) def test_part(self): np.testing.assert_allclose( @@ -317,12 +320,93 @@ def test_correlator(self): system.auto_update_accumulators[2].result(), expected) + @utx.skipIfMissingFeatures('DP3M') + @ut.skipIf('DP3M.CPU' not in modes, + "Skipping test due to missing combination.") + def test_dp3m(self): + actor = self.get_active_actor_of_type( + espressomd.magnetostatics.DipolarP3M) + state = actor.get_params() + reference = {'prefactor': 1.0, 'accuracy': 0.01, 'mesh': 3 * [8], + 'cao': 1, 'alpha': 12.0, 'r_cut': 2.4, 'tune': False, + 'mesh_off': [0.5, 0.5, 0.5], 'epsilon': 2.0} + for key in reference: + self.assertIn(key, state) + np.testing.assert_almost_equal(state[key], reference[key], + err_msg=f'for parameter {key}') + @utx.skipIfMissingFeatures('P3M') @ut.skipIf('P3M.CPU' not in modes, "Skipping test due to missing combination.") def test_p3m(self): - self.assertTrue(any(isinstance(actor, espressomd.electrostatics.P3M) - for actor in system.actors.active_actors)) + actor = self.get_active_actor_of_type(espressomd.electrostatics.P3M) + state = actor.get_params() + reference = {'prefactor': 1.0, 'accuracy': 0.1, 'mesh': 3 * [10], + 'cao': 1, 'alpha': 1.0, 'r_cut': 1.0, 'tune': False} + for key in reference: + self.assertIn(key, state) + np.testing.assert_almost_equal(state[key], reference[key], + err_msg=f'for parameter {key}') + + @utx.skipIfMissingFeatures('P3M') + @ut.skipIf('P3M.ELC' not in modes, + "Skipping test due to missing combination.") + def test_elc(self): + actor = self.get_active_actor_of_type(espressomd.electrostatics.ELC) + elc_state = actor.get_params() + p3m_state = elc_state['p3m_actor'].get_params() + p3m_reference = {'prefactor': 1.0, 'accuracy': 0.1, 'mesh': 3 * [10], + 'cao': 1, 'alpha': 1.0, 'r_cut': 1.0, 'tune': False} + elc_reference = {'gap_size': 6.0, 'maxPWerror': 0.1, + 'delta_mid_top': 0.9, 'delta_mid_bot': 0.1} + for key in elc_reference: + self.assertIn(key, elc_state) + np.testing.assert_almost_equal(elc_state[key], elc_reference[key], + err_msg=f'for parameter {key}') + for key in p3m_reference: + self.assertIn(key, p3m_state) + np.testing.assert_almost_equal(p3m_state[key], p3m_reference[key], + err_msg=f'for parameter {key}') + + @ut.skipIf(not espressomd.has_features('SCAFACOS') or + 'SCAFACOS' not in modes or + 'p3m' not in espressomd.scafacos.available_methods(), + "Skipping test due to missing combination or p3m method.") + def test_scafacos(self): + actor = self.get_active_actor_of_type( + espressomd.electrostatics.Scafacos) + state = actor.get_params() + reference = {'prefactor': 0.5, 'method_name': 'p3m', + 'method_params': { + 'p3m_cao': '7', + 'p3m_r_cut': '1.0', + 'p3m_grid': '64', + 'p3m_alpha': '2.084652'}} + for key in reference: + self.assertEqual(state[key], reference[key], msg=f'for {key}') + + @ut.skipIf(not espressomd.has_features('SCAFACOS_DIPOLES') or + 'SCAFACOS' not in modes or + 'p2nfft' not in espressomd.scafacos.available_methods(), + "Skipping test due to missing combination or p2nfft method.") + def test_scafacos_dipoles(self): + actor = self.get_active_actor_of_type( + espressomd.magnetostatics.Scafacos) + state = actor.get_params() + reference = {'prefactor': 1.2, 'method_name': 'p2nfft', + 'method_params': { + "p2nfft_verbose_tuning": "0", + "pnfft_N": "32,32,32", + "pnfft_n": "32,32,32", + "pnfft_window_name": "bspline", + "pnfft_m": "4", + "p2nfft_ignore_tolerance": "1", + "pnfft_diff_ik": "0", + "p2nfft_r_cut": "11", + "p2nfft_alpha": "0.37"}} + for key in reference: + self.assertIn(key, state) + self.assertEqual(state[key], reference[key], msg=f'for {key}') @utx.skipIfMissingFeatures('COLLISION_DETECTION') def test_collision_detection(self): @@ -350,6 +434,7 @@ def test_constraints(self): self.assertEqual(len(system.constraints), 8 - int(not espressomd.has_features("ELECTROSTATICS"))) c = system.constraints + ref_shape = self.ref_box_l.astype(int) + 2 self.assertIsInstance(c[0].shape, Sphere) self.assertAlmostEqual(c[0].shape.radius, 0.1, delta=1E-10) @@ -370,8 +455,9 @@ def test_constraints(self): self.assertAlmostEqual(c[4].gamma, 2.3, delta=1E-10) self.assertIsInstance(c[5], constraints.PotentialField) - self.assertEqual(c[5].field.shape, (14, 16, 18, 1)) + self.assertEqual(c[5].field.shape, tuple(list(ref_shape) + [1])) self.assertAlmostEqual(c[5].default_scale, 1.6, delta=1E-10) + self.assertAlmostEqual(c[5].particle_scales[5], 6.0, delta=1E-10) np.testing.assert_allclose(np.copy(c[5].origin), [-0.5, -0.5, -0.5]) np.testing.assert_allclose(np.copy(c[5].grid_spacing), np.ones(3)) ref_pot = constraints.PotentialField( @@ -380,7 +466,7 @@ def test_constraints(self): atol=1e-10) self.assertIsInstance(c[6], constraints.ForceField) - self.assertEqual(c[6].field.shape, (14, 16, 18, 3)) + self.assertEqual(c[6].field.shape, tuple(list(ref_shape) + [3])) self.assertAlmostEqual(c[6].default_scale, 1.4, delta=1E-10) np.testing.assert_allclose(np.copy(c[6].origin), [-0.5, -0.5, -0.5]) np.testing.assert_allclose(np.copy(c[6].grid_spacing), np.ones(3)) diff --git a/testsuite/python/tests_common.py b/testsuite/python/tests_common.py index 82b0b386bf5..72bce32f6fc 100644 --- a/testsuite/python/tests_common.py +++ b/testsuite/python/tests_common.py @@ -134,7 +134,7 @@ def abspath(path): def transform_pos_from_cartesian_to_polar_coordinates(pos): - """Transform the given cartesian coordinates to polar coordinates. + """Transform the given cartesian coordinates to cylindrical coordinates. Parameters ---------- @@ -167,33 +167,26 @@ def transform_vel_from_cartesian_to_polar_coordinates(pos, vel): (pos[0] * vel[1] - pos[1] * vel[0]) / np.sqrt(pos[0]**2 + pos[1]**2), vel[2]]) +def get_cylindrical_basis_vectors(pos): + phi = transform_pos_from_cartesian_to_polar_coordinates(pos)[1] + e_r = np.array([np.cos(phi), np.sin(phi), 0.]) + e_phi = np.array([-np.sin(phi), np.cos(phi), 0.]) + e_z = np.array([0., 0., 1.]) + return e_r, e_phi, e_z + + def convert_vec_body_to_space(system, part, vec): A = rotation_matrix_quat(system, part) return np.dot(A.transpose(), vec) -def rotation_matrix(axis, theta): +def rodrigues_rot(vec, axis, angle): """ - Return the rotation matrix associated with counterclockwise rotation about - the given axis by theta radians. - - Parameters - ---------- - axis : array_like :obj:`float` - Axis to rotate around. - theta : :obj:`float` - Rotation angle. - + https://en.wikipedia.org/wiki/Rodrigues%27_rotation_formula#Statement """ - axis = np.asarray(axis) - axis = axis / np.sqrt(np.dot(axis, axis)) - a = np.cos(theta / 2.0) - b, c, d = -axis * np.sin(theta / 2.0) - aa, bb, cc, dd = a * a, b * b, c * c, d * d - bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d - return np.array([[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)], - [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)], - [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]]) + axis /= np.linalg.norm(axis) + return np.cos(angle) * vec + np.sin(angle) * np.cross(axis, vec) + \ + (1 - np.cos(angle)) * np.dot(axis, vec) * axis def rotation_matrix_quat(system, part): @@ -225,55 +218,41 @@ def rotation_matrix_quat(system, part): return A -def get_cylindrical_bin_volume( - n_r_bins, - n_phi_bins, - n_z_bins, - min_r, - max_r, - min_phi, - max_phi, - min_z, - max_z): +def normalize_cylindrical_hist(histogram, cyl_obs_params): """ - Return the bin volumes for a cylindrical histogram. + normalize a histogram in cylindrical coordinates. Helper to test the output + of cylindrical histogram observables Parameters ---------- - n_r_bins : :obj:`float` - Number of bins in ``r`` direction. - n_phi_bins : :obj:`float` - Number of bins in ``phi`` direction. - n_z_bins : :obj:`float` - Number of bins in ``z`` direction. - min_r : :obj:`float` - Minimum considered value in ``r`` direction. - max_r : :obj:`float` - Maximum considered value in ``r`` direction. - min_phi : :obj:`float` - Minimum considered value in ``phi`` direction. - max_phi : :obj:`float` - Maximum considered value in ``phi`` direction. - min_z : :obj:`float` - Minimum considered value in ``z`` direction. - max_z : :obj:`float` - Maximum considered value in ``z`` direction. + histogram : (N,3) array_like of :obj:`float` + The histogram that needs to be normalized + cyl_obs_params : :obj:`dict` + A dictionary containing the common parameters of the cylindrical histogram observables. + Needs to contain the information about number and range of bins. + """ - Returns - ------- - array_like - Bin volumes. + n_r_bins = cyl_obs_params['n_r_bins'] + n_phi_bins = cyl_obs_params['n_phi_bins'] + n_z_bins = cyl_obs_params['n_z_bins'] + min_r = cyl_obs_params['min_r'] + max_r = cyl_obs_params['max_r'] + min_phi = cyl_obs_params['min_phi'] + max_phi = cyl_obs_params['max_phi'] + min_z = cyl_obs_params['min_z'] + max_z = cyl_obs_params['max_z'] - """ bin_volume = np.zeros(n_r_bins) r_bin_size = (max_r - min_r) / n_r_bins phi_bin_size = (max_phi - min_phi) / n_phi_bins z_bin_size = (max_z - min_z) / n_z_bins for i in range(n_r_bins): - bin_volume[i] = np.pi * ((min_r + r_bin_size * (i + 1))**2.0 - - (min_r + r_bin_size * i)**2.0) * \ + bin_volume = np.pi * ((min_r + r_bin_size * (i + 1))**2.0 - + (min_r + r_bin_size * i)**2.0) * \ phi_bin_size / (2.0 * np.pi) * z_bin_size - return bin_volume + histogram[i, :, :] /= bin_volume + + return histogram def get_histogram(pos, obs_params, coord_system, **kwargs): @@ -638,13 +617,6 @@ def gay_berne_potential(r_ij, u_i, u_j, epsilon_0, sigma_0, mu, nu, k_1, k_2): return 4. * epsilon * (rr**-12 - rr**-6) -class DynamicDict(dict): - - def __getitem__(self, key): - value = super().__getitem__(key) - return eval(value, self) if isinstance(value, str) else value - - def count_fluid_nodes(lbf): """Counts the non-boundary nodes in the passed lb fluid instance.""" @@ -654,3 +626,14 @@ def count_fluid_nodes(lbf): fluid_nodes += 1 return fluid_nodes + + +def random_dipoles(n_particles): + """Generate random dipoles by sampling Euler angles uniformly at random.""" + cos_theta = 2 * np.random.random(n_particles) - 1 + sin_theta = np.sin(np.arcsin(cos_theta)) + phi = 2 * np.pi * np.random.random(n_particles) + dip = np.array([sin_theta * np.cos(phi), + sin_theta * np.sin(phi), + cos_theta]).T + return dip diff --git a/testsuite/python/utils.py b/testsuite/python/utils.py index dbd6e88d75a..0a19d4b5a94 100644 --- a/testsuite/python/utils.py +++ b/testsuite/python/utils.py @@ -56,7 +56,7 @@ def test_is_valid_type(self): self.assertTrue(utils.is_valid_type( np.array([12], dtype=int)[0], int)) self.assertTrue(utils.is_valid_type( - np.array([12], dtype=np.long)[0], int)) + np.array([12], dtype=int)[0], int)) self.assertTrue(utils.is_valid_type( np.array([1.], dtype=float)[0], float)) self.assertTrue(utils.is_valid_type( diff --git a/testsuite/python/virtual_sites_tracers_common.py b/testsuite/python/virtual_sites_tracers_common.py index 45d7a791348..e36d98a56f0 100644 --- a/testsuite/python/virtual_sites_tracers_common.py +++ b/testsuite/python/virtual_sites_tracers_common.py @@ -108,9 +108,7 @@ def compute_angle(self): n1 = n1 / norm1 n2 = n2 / norm2 - cos_alpha = np.dot(n1, n2) - if cos_alpha > 1: - cos_alpha = 1 + cos_alpha = min(1, np.dot(n1, n2)) alpha = np.arccos(cos_alpha) return alpha diff --git a/testsuite/scripts/samples/test_load_checkpoint.py b/testsuite/scripts/samples/test_load_checkpoint.py index f3904590b4c..a4176768981 100644 --- a/testsuite/scripts/samples/test_load_checkpoint.py +++ b/testsuite/scripts/samples/test_load_checkpoint.py @@ -19,21 +19,19 @@ import importlib_wrapper -def shorten_loop(code): - breakpoint = "while True:" - assert breakpoint in code - code = code.replace(breakpoint, "for _ in range(6):", 1) - return code - - sample, skipIfMissingFeatures = importlib_wrapper.configure_and_import( - "@SAMPLES_DIR@/load_checkpoint.py", substitutions=shorten_loop) + "@SAMPLES_DIR@/load_checkpoint.py") @skipIfMissingFeatures class Sample(ut.TestCase): system = sample.system + def test_file_generation(self): + self.assertEqual(set(sample.checkpoint.get_registered_objects()), + {'myvar', 'system', 'p3m'}) + self.assertEqual(sample.myvar, "some script variable (updated value)") + if __name__ == "__main__": ut.main() diff --git a/testsuite/scripts/tutorials/test_active_matter__rectification_simulation.py b/testsuite/scripts/tutorials/test_active_matter__rectification_simulation.py index 5a6ace54591..fe2b21c2dd5 100644 --- a/testsuite/scripts/tutorials/test_active_matter__rectification_simulation.py +++ b/testsuite/scripts/tutorials/test_active_matter__rectification_simulation.py @@ -20,9 +20,11 @@ import os import numpy as np +np.random.seed(40) + tutorial, skipIfMissingFeatures = importlib_wrapper.configure_and_import( "@TUTORIALS_DIR@/active_matter/solutions/rectification_simulation.py", - cmd_arguments=[6.0], PROD_STEPS=100, PROD_LENGTH=150) + cmd_arguments=[6.0], PROD_STEPS=100, PROD_LENGTH=100) @skipIfMissingFeatures diff --git a/testsuite/scripts/tutorials/test_raspberry_electrophoresis.py b/testsuite/scripts/tutorials/test_raspberry_electrophoresis.py index 85176a36151..d1c17fe33fa 100644 --- a/testsuite/scripts/tutorials/test_raspberry_electrophoresis.py +++ b/testsuite/scripts/tutorials/test_raspberry_electrophoresis.py @@ -18,29 +18,29 @@ import unittest as ut import importlib_wrapper import numpy as np +np.random.seed(42) tutorial, skipIfMissingFeatures = importlib_wrapper.configure_and_import( "@TUTORIALS_DIR@/raspberry_electrophoresis/raspberry_electrophoresis.py", - gpu=True, box_l=20., E=0.25, num_iterations=200, num_steps_per_iteration=250) + gpu=True, box_l=20., num_iterations=20, num_steps_per_iteration=20) @skipIfMissingFeatures class Tutorial(ut.TestCase): + '''Check the raspberry travels a longer distance on the x-axis''' system = tutorial.system def test_trajectory_sample(self): trajectory = np.loadtxt('posVsTime_sample.dat')[:, 1:4] - # the raspberry should have traveled mostly on the x-axis - dist = np.abs(trajectory[-1, :] - trajectory[0, :]) - self.assertGreater(dist[0], dist[1]) - self.assertGreater(dist[0], dist[2]) + x, y, z = np.abs(trajectory[-1, :] - trajectory[0, :]) + self.assertGreater(x, y) + self.assertGreater(x, z) def test_trajectory_simulated(self): trajectory = np.loadtxt('posVsTime.dat')[:, 1:4] - # the raspberry should have traveled mostly on the x-axis, - # but due to insufficient sampling, it's not always the case - dist = np.abs(trajectory[-1, :] - trajectory[0, :]) - self.assertGreater(dist[0], np.min(dist[1:])) + x, y, z = np.abs(trajectory[-1, :] - trajectory[0, :]) + self.assertGreater(x, y) + self.assertGreater(x, z) if __name__ == "__main__":