From 3f07a87d9fc2e1946d473f8299916683c39b31f9 Mon Sep 17 00:00:00 2001 From: Kacper Derlatka <51274280+Delcior@users.noreply.github.com> Date: Fri, 22 Mar 2024 10:03:52 +0100 Subject: [PATCH] add support for threading and for user-specified MPI dimension. Closes #35 (#98) * mpi: outer, threads:2 * add TODO ids * add TODO label (3D support) * skipping multi-threading tests if JIT disabled * add pylint-disable comment * print number of cores on workers * log info about supported plotting options * debug: print num_threads per worker * debug: omit pylint * debug: omit pylint * hardcore threads * fix issue number * mpi_dim as scenarion ctor argument; new test for mpi_indices; more readable code around send/recv tags for threading * pylint fixes * pylint fixes ++ * pylint disable too-many-args for whole file * check thread number every time * increase workflow timeout * check if the 3rd-order-terms issue indeed is present? * indeed 3rd-order terms cause problems with mpi_dim=INNER :( * increase timeout to 120min * remove 4th worker from the test * increase timeout to 60min * updates to README to reflect threading support changes * removing debug leftover --------- Co-authored-by: Sylwester Arabas Co-authored-by: Sylwester Arabas --- .github/workflows/tests+pypi.yml | 14 +++-- PyMPDATA_MPI/domain_decomposition.py | 19 ++++--- .../impl/boundary_condition_commons.py | 36 ++++++++---- PyMPDATA_MPI/impl/mpi_boundary_condition.py | 24 ++++---- PyMPDATA_MPI/mpi_periodic.py | 5 +- PyMPDATA_MPI/mpi_polar.py | 6 +- README.md | 16 ++---- scenarios/_scenario.py | 20 +++++-- scenarios/cartesian.py | 26 ++++++--- scenarios/spherical.py | 22 +++++-- .../test_single_vs_multi_node.py | 57 ++++++++++++------- .../unit_tests/test_domain_decomposition.py | 33 +++++++++++ tests/local/unit_tests/test_simulation.py | 21 ------- 13 files changed, 190 insertions(+), 109 deletions(-) create mode 100644 tests/local/unit_tests/test_domain_decomposition.py delete mode 100644 tests/local/unit_tests/test_simulation.py diff --git a/.github/workflows/tests+pypi.yml b/.github/workflows/tests+pypi.yml index 0a1578c..b795077 100644 --- a/.github/workflows/tests+pypi.yml +++ b/.github/workflows/tests+pypi.yml @@ -84,7 +84,7 @@ jobs: mpi: [ 'mpich', 'openmpi', 'intelmpi'] python-version: ["3.10"] disable-jit: [1, 0] - mpi-np: [1, 2, 3, 4] + mpi-np: [1, 2, 3] exclude: # as of time of writing, mpi4py/setup-mpi does not support it - platform: macos-latest @@ -100,7 +100,7 @@ jobs: fail-fast: false runs-on: ${{ matrix.platform }} - timeout-minutes: 45 + timeout-minutes: 60 steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v1 @@ -112,13 +112,17 @@ jobs: - if: matrix.mpi == 'mpich' run: echo _ch="ch" >> $GITHUB_ENV - if: startsWith(matrix.platform, 'ubuntu-') - run: sudo apt-get update && sudo apt-get install -y libhdf5-mpi$_ch-dev pkg-config + run: | + sudo apt-get update && sudo apt-get install -y libhdf5-mpi$_ch-dev pkg-config + lscpu - if: startsWith(matrix.platform, 'ubuntu-') && matrix.mpi == 'mpich' run: | echo HDF5_LIBDIR=/usr/lib/x86_64-linux-gnu/hdf5/mpich >> $GITHUB_ENV echo HDF5_INCLUDEDIR=/usr/include/hdf5/mpich >> $GITHUB_ENV - if: startsWith(matrix.platform, 'macos-') - run: brew install hdf5-mpi && echo HDF5_DIR=/opt/homebrew >> $GITHUB_ENV + run: | + brew install hdf5-mpi && echo HDF5_DIR=/opt/homebrew >> $GITHUB_ENV + sysctl -a | grep cpu | grep hw - run: HDF5_MPI="ON" CC=mpicc pip install --no-binary=h5py "git+https://github.com/h5py/h5py@81f6c01#egg=h5py" - run: pip install -e .[tests] - run: python -We -c "import PyMPDATA_MPI" @@ -137,7 +141,7 @@ jobs: export COV_ARGS="--cov=./ --cov-report=xml" pip install pytest-cov fi - mpiexec $_mpiexec_args -n ${{ matrix.mpi-np }} pytest $COV_ARGS --timeout=600 --timeout_method=thread -s -vv -We tests/local; + NUMBA_NUM_THREADS=3 mpiexec $_mpiexec_args -n ${{ matrix.mpi-np }} pytest $COV_ARGS --timeout=600 --timeout_method=thread -s -vv -We tests/local; - uses: actions/upload-artifact@v2 with: name: plots diff --git a/PyMPDATA_MPI/domain_decomposition.py b/PyMPDATA_MPI/domain_decomposition.py index f7ded1b..4cb3687 100644 --- a/PyMPDATA_MPI/domain_decomposition.py +++ b/PyMPDATA_MPI/domain_decomposition.py @@ -1,16 +1,17 @@ -# pylint: disable=missing-module-docstring,missing-function-docstring,missing-class-docstring,invalid-name +""" MPI-aware domain decomposition utilities """ import numpy as np from PyMPDATA.impl.domain_decomposition import make_subdomain -from PyMPDATA.impl.enumerations import OUTER - -MPI_DIM = OUTER subdomain = make_subdomain(jit_flags={}) -def mpi_indices(grid, rank, size): - start, stop = subdomain(grid[MPI_DIM], rank, size) - xi, yi = np.indices((stop - start, grid[MPI_DIM - 1]), dtype=float) - xi += start - return xi, yi +def mpi_indices(*, grid, rank, size, mpi_dim): + """returns a mapping from rank-local indices to domain-wide indices, + (subdomain-aware equivalent of np.indices)""" + start, stop = subdomain(grid[mpi_dim], rank, size) + indices_arg = list(grid) + indices_arg[mpi_dim] = stop - start + xyi = np.indices(tuple(indices_arg), dtype=float) + xyi[mpi_dim] += start + return xyi diff --git a/PyMPDATA_MPI/impl/boundary_condition_commons.py b/PyMPDATA_MPI/impl/boundary_condition_commons.py index f84112c..fad1966 100644 --- a/PyMPDATA_MPI/impl/boundary_condition_commons.py +++ b/PyMPDATA_MPI/impl/boundary_condition_commons.py @@ -1,17 +1,18 @@ +# pylint: disable=too-many-arguments """ boundary_condition common functions """ from functools import lru_cache import numba import numba_mpi as mpi -from PyMPDATA.impl.enumerations import INVALID_INDEX +from PyMPDATA.impl.enumerations import INVALID_INDEX, OUTER IRRELEVANT = 666 @lru_cache() def make_scalar_boundary_condition( - indexers, jit_flags, dimension_index, dtype, get_peer + *, indexers, jit_flags, dimension_index, dtype, get_peer, mpi_dim ): """returns fill_halos() function for scalar boundary conditions. Provides default logic for scalar buffer filling. Notable arguments: @@ -26,9 +27,10 @@ def fill_buf(buf, psi, i_rng, k_rng, sign, _dim): (i, INVALID_INDEX, k), psi, sign ) - send_recv = _make_send_recv(indexers.set, jit_flags, fill_buf, dtype, get_peer) + send_recv = _make_send_recv( + indexers.set, jit_flags, fill_buf, dtype, get_peer, mpi_dim + ) - # pylint: disable=too-many-arguments @numba.njit(**jit_flags) def fill_halos(buffer, i_rng, j_rng, k_rng, psi, _, sign): send_recv(buffer, psi, i_rng, j_rng, k_rng, sign, IRRELEVANT, psi) @@ -36,10 +38,9 @@ def fill_halos(buffer, i_rng, j_rng, k_rng, psi, _, sign): return fill_halos -# pylint: disable=too-many-arguments @lru_cache() -def make_vector_boundary_condition( # pylint: disable=too-many-arguments - indexers, halo, jit_flags, dimension_index, dtype, get_peer +def make_vector_boundary_condition( + indexers, halo, jit_flags, dimension_index, dtype, get_peer, mpi_dim ): """returns fill_halos() function for vector boundary conditions. Provides default logic for vector buffer filling. Notable arguments: @@ -63,7 +64,9 @@ def fill_buf(buf, components, i_rng, k_rng, sign, dim): buf[i - i_rng.start, k - k_rng.start] = value - send_recv = _make_send_recv(indexers.set, jit_flags, fill_buf, dtype, get_peer) + send_recv = _make_send_recv( + indexers.set, jit_flags, fill_buf, dtype, get_peer, mpi_dim + ) @numba.njit(**jit_flags) def fill_halos_loop_vector(buffer, i_rng, j_rng, k_rng, components, dim, _, sign): @@ -74,10 +77,17 @@ def fill_halos_loop_vector(buffer, i_rng, j_rng, k_rng, components, dim, _, sign return fill_halos_loop_vector -def _make_send_recv(set_value, jit_flags, fill_buf, dtype, get_peer): +def _make_send_recv(set_value, jit_flags, fill_buf, dtype, get_peer, mpi_dim): + @numba.njit(**jit_flags) def get_buffer_chunk(buffer, i_rng, k_rng, chunk_index): chunk_size = len(i_rng) * len(k_rng) + if mpi_dim != OUTER: + n_chunks = len(buffer) // (chunk_size * numba.get_num_threads()) + chunk_index += numba.get_thread_id() * n_chunks + else: + n_chunks = len(buffer) // (chunk_size * 2) + chunk_index += int(numba.get_thread_id() != 0) * n_chunks return buffer.view(dtype)[ chunk_index * chunk_size : (chunk_index + 1) * chunk_size ].reshape((len(i_rng), len(k_rng))) @@ -97,12 +107,16 @@ def fill_output(output, buffer, i_rng, j_rng, k_rng): @numba.njit(**jit_flags) def _send(buf, peer, fill_buf_args): + tag = numba.get_thread_id() fill_buf(buf, *fill_buf_args) - mpi.send(buf, dest=peer) + mpi.send(buf, dest=peer, tag=tag) @numba.njit(**jit_flags) def _recv(buf, peer): - mpi.recv(buf, source=peer) + th_id = numba.get_thread_id() + n_th = numba.get_num_threads() + tag = th_id if mpi_dim != OUTER else {0: n_th - 1, n_th - 1: 0}[th_id] + mpi.recv(buf, source=peer, tag=tag) @numba.njit(**jit_flags) def _send_recv(buffer, psi, i_rng, j_rng, k_rng, sign, dim, output): diff --git a/PyMPDATA_MPI/impl/mpi_boundary_condition.py b/PyMPDATA_MPI/impl/mpi_boundary_condition.py index b6e4042..9d04ed3 100644 --- a/PyMPDATA_MPI/impl/mpi_boundary_condition.py +++ b/PyMPDATA_MPI/impl/mpi_boundary_condition.py @@ -6,10 +6,16 @@ class MPIBoundaryCondition: """common base class for MPI boundary conditions""" - def __init__(self, base, size): + def __init__(self, base, size, mpi_dim): self.__mpi_size_one = size == 1 self.worker_pool_size = size self.base = base + self.mpi_dim = mpi_dim + + @staticmethod + def make_get_peer(_, __): + """returns (lru-cached) numba-compiled callable.""" + raise NotImplementedError() # pylint: disable=too-many-arguments def make_scalar(self, indexers, halo, dtype, jit_flags, dimension_index): @@ -19,14 +25,10 @@ def make_scalar(self, indexers, halo, dtype, jit_flags, dimension_index): indexers, halo, dtype, jit_flags, dimension_index ) return make_scalar_boundary_condition( - indexers, - jit_flags, - dimension_index, - dtype, - self.make_get_peer(jit_flags, self.worker_pool_size), + indexers=indexers, + jit_flags=jit_flags, + dimension_index=dimension_index, + dtype=dtype, + get_peer=self.make_get_peer(jit_flags, self.worker_pool_size), + mpi_dim=self.mpi_dim, ) - - @staticmethod - def make_get_peer(_, __): - """returns (lru-cached) numba-compiled callable.""" - raise NotImplementedError() diff --git a/PyMPDATA_MPI/mpi_periodic.py b/PyMPDATA_MPI/mpi_periodic.py index d4c001c..889b758 100644 --- a/PyMPDATA_MPI/mpi_periodic.py +++ b/PyMPDATA_MPI/mpi_periodic.py @@ -16,13 +16,13 @@ class MPIPeriodic(MPIBoundaryCondition): `PyMPDATA.scalar_field.ScalarField` and `PyMPDATA.vector_field.VectorField` __init__ methods""" - def __init__(self, size): + def __init__(self, size, mpi_dim): # passing size insead of using mpi.size() because lack of support for non-default # MPI communicators. https://github.com/numba-mpi/numba-mpi/issues/64 assert SIGN_RIGHT == -1 assert SIGN_LEFT == +1 - super().__init__(size=size, base=Periodic) + super().__init__(size=size, base=Periodic, mpi_dim=mpi_dim) # pylint: disable=too-many-arguments def make_vector(self, indexers, halo, dtype, jit_flags, dimension_index): @@ -38,6 +38,7 @@ def make_vector(self, indexers, halo, dtype, jit_flags, dimension_index): dimension_index, dtype, self.make_get_peer(jit_flags, self.worker_pool_size), + self.mpi_dim, ) @staticmethod diff --git a/PyMPDATA_MPI/mpi_polar.py b/PyMPDATA_MPI/mpi_polar.py index 704ed21..042fa5e 100644 --- a/PyMPDATA_MPI/mpi_polar.py +++ b/PyMPDATA_MPI/mpi_polar.py @@ -7,7 +7,6 @@ from PyMPDATA.boundary_conditions import Polar from PyMPDATA.impl.enumerations import INNER, OUTER -from PyMPDATA_MPI.domain_decomposition import MPI_DIM from PyMPDATA_MPI.impl import MPIBoundaryCondition @@ -16,8 +15,8 @@ class MPIPolar(MPIBoundaryCondition): `PyMPDATA.scalar_field.ScalarField` and `PyMPDATA.vector_field.VectorField` __init__ methods""" - def __init__(self, mpi_grid, grid): - self.worker_pool_size = grid[MPI_DIM] // mpi_grid[MPI_DIM] + def __init__(self, mpi_grid, grid, mpi_dim): + self.worker_pool_size = grid[mpi_dim] // mpi_grid[mpi_dim] self.__mpi_size_one = self.worker_pool_size == 1 if not self.__mpi_size_one: @@ -31,6 +30,7 @@ def __init__(self, mpi_grid, grid): if self.__mpi_size_one else None ), + mpi_dim=mpi_dim, ) @staticmethod diff --git a/README.md b/README.md index 7eeece4..8177458 100644 --- a/README.md +++ b/README.md @@ -58,8 +58,10 @@ Note that the spherical animations below depict simulations without MPDATA corre In the cartesian example below (based on a test case from [Arabas et al. 2014](https://doi.org/10.3233/SPR-140379)), a constant advector field $u$ is used (and $G=1$). MPI (Message Passing Interface) is used - for handling data transfers and synchronisation in the outer dimension, - while multi-threading (using, e.g., OpenMP via Numba) is used in the inner dimension. + for handling data transfers and synchronisation with the domain decomposition + across MPI workers done in either inner or in the outer dimension (user setting). +Multi-threading (using, e.g., OpenMP via Numba) is used for shared-memory parallelisation + within subdomains with further subdomain split across the inner dimension (PyMPDATA logic). In this example, two corrective MPDATA iterations are employed. ### 1 worker @@ -80,14 +82,6 @@ In this example, two corrective MPDATA iterations are employed.

-### 4 workers -

- - - - -

- ## Package architecture ```mermaid @@ -143,7 +137,7 @@ licence: [GPL v3](https://www.gnu.org/licenses/gpl-3.0.html) - MPI support for PyMPDATA implemented externally (i.e., not incurring any overhead or additional dependencies for PyMPDATA users) - MPI calls within Numba njitted code (hence not using `mpi4py`, but leveraging `numba-mpi`) -- hybrid threading (internal in PyMPDATA, in the inner dimension) + MPI (outer dimension) parallelisation +- hybrid domain decomposition parallelisation: threading (internal in PyMPDATA, in the inner dimension) + MPI (either inner or outer dimension) - portability across major OSes (currently Linux & macOS; no Windows support due [challenges in getting HDF5/MPI-IO to work there](https://docs.h5py.org/en/stable/build.html#source-installation-on-windows)) - full test coverage including CI builds asserting on same results with multi-node vs. single-node computations - Continuous Integration with different OSes and different MPI implementation diff --git a/scenarios/_scenario.py b/scenarios/_scenario.py index a1a75d6..24ba10a 100644 --- a/scenarios/_scenario.py +++ b/scenarios/_scenario.py @@ -1,17 +1,20 @@ -# pylint: disable=too-few-public-methods """ Provides base _Scenario base class that every scenario should inherit """ + from PyMPDATA import Solver +from PyMPDATA.impl.enumerations import INNER, OUTER -class _Scenario: +class _Scenario: # pylint: disable=too-few-public-methods """Base class for every Scenario. Provides logic for advance() function""" - def __init__(self, *, stepper, advectee, advector, g_factor=None): + # pylint: disable=too-many-arguments + def __init__(self, *, mpi_dim, stepper, advectee, advector, g_factor=None): + self.mpi_dim = mpi_dim self.solver = Solver( stepper=stepper, advectee=advectee, advector=advector, g_factor=g_factor ) - def advance(self, dataset, output_steps, x_range): + def advance(self, dataset, output_steps, mpi_range): """Logic for performing simulation. Returns wall time of one timestep (in clock ticks)""" steps_done = 0 wall_time = 0 @@ -21,5 +24,12 @@ def advance(self, dataset, output_steps, x_range): wall_time_per_timestep = self.solver.advance(n_steps=n_steps) wall_time += wall_time_per_timestep * n_steps steps_done += n_steps - dataset[x_range, :, index] = self.solver.advectee.get() + data = self.solver.advectee.get() + dataset[ + ( + mpi_range if self.mpi_dim == OUTER else slice(None), + mpi_range if self.mpi_dim == INNER else slice(None), + slice(index, index + 1), + ) + ] = data.reshape((data.shape[0], data.shape[1], 1)) return wall_time diff --git a/scenarios/cartesian.py b/scenarios/cartesian.py index 2f129b4..cdeab96 100644 --- a/scenarios/cartesian.py +++ b/scenarios/cartesian.py @@ -4,6 +4,7 @@ from matplotlib import pyplot from PyMPDATA import ScalarField, Stepper, VectorField from PyMPDATA.boundary_conditions import Periodic +from PyMPDATA.impl.enumerations import INNER, OUTER from PyMPDATA_MPI.domain_decomposition import mpi_indices from PyMPDATA_MPI.mpi_periodic import MPIPeriodic @@ -24,16 +25,22 @@ def __init__( # pylint: disable=too-many-arguments rank, size, courant_field_multiplier, + mpi_dim, ): # pylint: disable=too-many-locals, invalid-name halo = mpdata_options.n_halo - xi, yi = mpi_indices(grid, rank, size) - nx, ny = xi.shape + xyi = mpi_indices(grid=grid, rank=rank, size=size, mpi_dim=mpi_dim) + nx, ny = xyi[mpi_dim].shape - boundary_conditions = (MPIPeriodic(size=size), Periodic()) + mpi_periodic = MPIPeriodic(size=size, mpi_dim=mpi_dim) + periodic = Periodic() + boundary_conditions = ( + mpi_periodic if mpi_dim == OUTER else periodic, + mpi_periodic if mpi_dim == INNER else periodic, + ) advectee = ScalarField( - data=self.initial_condition(xi, yi, grid), + data=self.initial_condition(*xyi, grid), halo=mpdata_options.n_halo, boundary_conditions=boundary_conditions, ) @@ -52,11 +59,16 @@ def __init__( # pylint: disable=too-many-arguments n_threads=n_threads, left_first=tuple([rank % 2 == 0] * 2), # TODO #70 (see also https://github.com/open-atmos/PyMPDATA/issues/386) - buffer_size=((ny + 2 * halo) * halo) + buffer_size=( + (ny if mpi_dim == OUTER else nx + 2 * halo) * halo + ) # TODO #38 support for 3D domain * 2 # for temporary send/recv buffer on one side - * 2, # for complex dtype + * 2 # for complex dtype + * (2 if mpi_dim == OUTER else n_threads), + ) + super().__init__( + mpi_dim=mpi_dim, stepper=stepper, advectee=advectee, advector=advector ) - super().__init__(stepper=stepper, advectee=advectee, advector=advector) @staticmethod def initial_condition(xi, yi, grid): diff --git a/scenarios/spherical.py b/scenarios/spherical.py index faf4649..7264cd3 100644 --- a/scenarios/spherical.py +++ b/scenarios/spherical.py @@ -87,7 +87,15 @@ class SphericalScenario(_Scenario): """ def __init__( # pylint: disable=too-many-arguments - self, *, mpdata_options, n_threads, grid, rank, size, courant_field_multiplier + self, + *, + mpi_dim, + mpdata_options, + n_threads, + grid, + rank, + size, + courant_field_multiplier, ): # pylint: disable=too-many-locals,invalid-name self.settings = WilliamsonAndRasch89Settings( @@ -96,7 +104,7 @@ def __init__( # pylint: disable=too-many-arguments output_steps=range(0, 5120 // 3, 100), # original: 5120 ) - xi, _ = mpi_indices(grid, rank, size) + xi, _ = mpi_indices(grid=grid, rank=rank, size=size, mpi_dim=mpi_dim) mpi_nlon, mpi_nlat = xi.shape assert size == 1 or mpi_nlon < self.settings.nlon @@ -105,8 +113,8 @@ def __init__( # pylint: disable=too-many-arguments assert x0 == xi[0, 0] boundary_conditions = ( - MPIPeriodic(size=size), - MPIPolar(mpi_grid=(mpi_nlon, mpi_nlat), grid=grid), + MPIPeriodic(size=size, mpi_dim=mpi_dim), + MPIPolar(mpi_grid=(mpi_nlon, mpi_nlat), grid=grid, mpi_dim=mpi_dim), ) advector_x = courant_field_multiplier[0] * np.array( @@ -179,7 +187,11 @@ def __init__( # pylint: disable=too-many-arguments * 2, # for complex dtype ) super().__init__( - stepper=stepper, advectee=advectee, advector=advector, g_factor=g_factor + mpi_dim=mpi_dim, + stepper=stepper, + advectee=advectee, + advector=advector, + g_factor=g_factor, ) def quick_look(self, state): diff --git a/tests/local/contract_tests/test_single_vs_multi_node.py b/tests/local/contract_tests/test_single_vs_multi_node.py index 04988a9..14924c6 100644 --- a/tests/local/contract_tests/test_single_vs_multi_node.py +++ b/tests/local/contract_tests/test_single_vs_multi_node.py @@ -4,12 +4,14 @@ import shutil from pathlib import Path -import mpi4py +import numba import numba_mpi as mpi import numpy as np import pytest from matplotlib import pyplot +from mpi4py import MPI from PyMPDATA import Options +from PyMPDATA.impl.enumerations import INNER, OUTER from PyMPDATA_MPI.domain_decomposition import subdomain from PyMPDATA_MPI.hdf_storage import HDFStorage @@ -23,32 +25,35 @@ {"n_iters": 3}, ) -COURANT_FIELD_MULTIPLIER = ( - (0.5, 0.25), - (-0.5, 0.25), - (0.5, -0.25), - (-0.5, -0.25), -) +COURANT_FIELD_MULTIPLIER = ((0.5, 0.25), (-0.5, 0.25), (0.5, -0.25), (-0.5, -0.25)) + +CARTESIAN_OUTPUT_STEPS = range(0, 2, 1) + +SPHERICAL_OUTPUT_STEPS = range(0, 2000, 100) @pytest.mark.parametrize( - "scenario_class, output_steps", + "scenario_class, output_steps, n_threads", ( - (CartesianScenario, range(0, 24, 2)), - (SphericalScenario, range(0, 2000, 100)), + (CartesianScenario, CARTESIAN_OUTPUT_STEPS, 1), + (CartesianScenario, CARTESIAN_OUTPUT_STEPS, 2), + (CartesianScenario, CARTESIAN_OUTPUT_STEPS, 3), + (SphericalScenario, SPHERICAL_OUTPUT_STEPS, 1), # TODO #56 ), ) @pytest.mark.parametrize("options_kwargs", OPTIONS_KWARGS) -@pytest.mark.parametrize("n_threads", (1,)) # TODO #35 : 2+ @pytest.mark.parametrize("courant_field_multiplier", COURANT_FIELD_MULTIPLIER) -def test_single_vs_multi_node( # pylint: disable=too-many-arguments +@pytest.mark.parametrize("mpi_dim", (INNER, OUTER)) +def test_single_vs_multi_node( # pylint: disable=too-many-arguments,too-many-branches,too-many-statements + *, + mpi_dim, scenario_class, mpi_tmp_path_fixed, options_kwargs, n_threads, courant_field_multiplier, output_steps, - grid=(64, 32), + grid=(64, 32), # TODO #101 ): """ Test is divided into three logical stages. @@ -57,7 +62,6 @@ def test_single_vs_multi_node( # pylint: disable=too-many-arguments Each iteration uses different domain decomposition. Last stage is responsible for comparing results to ground truth (which is simulation performed on single node environment) - """ # pylint: disable=too-many-locals if scenario_class is SphericalScenario and options_kwargs["n_iters"] > 1: @@ -66,6 +70,18 @@ def test_single_vs_multi_node( # pylint: disable=too-many-arguments if scenario_class is SphericalScenario and mpi.size() > 2: pytest.skip("TODO #56") + if scenario_class is SphericalScenario and mpi_dim == INNER: + pytest.skip("TODO #56") + + if n_threads > 1 and options_kwargs.get("nonoscillatory", False): + pytest.skip("TODO #99") + + if mpi_dim == INNER and options_kwargs.get("third_order_terms", False): + pytest.skip("TODO #102") + + if n_threads > 1 and numba.config.DISABLE_JIT: # pylint: disable=no-member + pytest.skip("threading requires Numba JIT to be enabled") + plot = True and ( "CI_PLOTS_PATH" in os.environ and courant_field_multiplier == COURANT_FIELD_MULTIPLIER[0] @@ -73,7 +89,6 @@ def test_single_vs_multi_node( # pylint: disable=too-many-arguments options_kwargs == OPTIONS_KWARGS[-1] or scenario_class is SphericalScenario ) ) - # arrange options_str = ( str(options_kwargs) @@ -93,6 +108,7 @@ def test_single_vs_multi_node( # pylint: disable=too-many-arguments dataset_name = "test" # act + numba.set_num_threads(n_threads) for mpi_max_size, path in paths.items(): truncated_size = min(mpi_max_size, mpi.size()) rank = mpi.rank() @@ -122,11 +138,12 @@ def test_single_vs_multi_node( # pylint: disable=too-many-arguments ) with Storage.mpi_context( - path, "r+", mpi4py.MPI.COMM_WORLD.Split(rank < truncated_size, rank) + path, "r+", MPI.COMM_WORLD.Split(rank < truncated_size, rank) ) as storage: dataset = setup_dataset_and_sync_all_workers(storage, dataset_name) if rank < truncated_size: simulation = scenario_class( + mpi_dim=mpi_dim, mpdata_options=Options(**options_kwargs), n_threads=n_threads, grid=grid, @@ -134,16 +151,18 @@ def test_single_vs_multi_node( # pylint: disable=too-many-arguments size=truncated_size, courant_field_multiplier=courant_field_multiplier, ) - x_range = slice(*subdomain(grid[0], rank, truncated_size)) + mpi_range = slice( + *subdomain(grid[simulation.mpi_dim], rank, truncated_size) + ) - simulation.advance(dataset, output_steps, x_range) + simulation.advance(dataset, output_steps, mpi_range) # plot if plot: tmp = np.empty_like(dataset[:, :, -1]) for i, _ in enumerate(output_steps): tmp[:] = np.nan - tmp[x_range, :] = dataset[x_range, :, i] + tmp[:, mpi_range] = dataset[:, mpi_range, i] simulation.quick_look(tmp) filename = f"step={i:04d}.svg" pyplot.savefig(plot_path / filename) diff --git a/tests/local/unit_tests/test_domain_decomposition.py b/tests/local/unit_tests/test_domain_decomposition.py new file mode 100644 index 0000000..7e84bec --- /dev/null +++ b/tests/local/unit_tests/test_domain_decomposition.py @@ -0,0 +1,33 @@ +""" +tests for domain decomposition utilities +""" + +import pytest +from PyMPDATA.impl.enumerations import INNER, OUTER + +from PyMPDATA_MPI.domain_decomposition import mpi_indices + + +@pytest.mark.parametrize( + "grid, rank, size, mpi_dim, expected", + ( + # size=1 + ((2, 2), 0, 1, OUTER, [[[0, 0], [1, 1]], [[0, 1], [0, 1]]]), + ((2, 2), 0, 1, INNER, [[[0, 0], [1, 1]], [[0, 1], [0, 1]]]), + # size=2 + ((2, 2), 0, 2, OUTER, [[[0, 0]], [[0, 1]]]), + ((2, 2), 1, 2, OUTER, [[[1, 1]], [[0, 1]]]), + ((2, 2), 0, 2, INNER, [[[0], [1]], [[0], [0]]]), + ((2, 2), 1, 2, INNER, [[[0], [1]], [[1], [1]]]), + ), +) +def test_mpi_indices(grid, rank, size, mpi_dim, expected): + """tests the subdomain-aware index-generation logic""" + # arrange + sut = mpi_indices + + # act + xyi = sut(grid=grid, rank=rank, size=size, mpi_dim=mpi_dim) + + # assert + assert (xyi == expected).all() diff --git a/tests/local/unit_tests/test_simulation.py b/tests/local/unit_tests/test_simulation.py deleted file mode 100644 index c049e2e..0000000 --- a/tests/local/unit_tests/test_simulation.py +++ /dev/null @@ -1,21 +0,0 @@ -# pylint: disable=missing-module-docstring,missing-function-docstring,missing-class-docstring,invalid-name - -import pytest - -from PyMPDATA_MPI.domain_decomposition import mpi_indices - - -@pytest.mark.parametrize( - "grid, rank, size, expected", - ( - ((2, 3), 0, 1, [[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]), - ((2, 3), 0, 2, [[0.0, 0.0, 0.0]]), - ((2, 3), 1, 2, [[1.0, 1.0, 1.0]]), - ((3, 2), 0, 1, [[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]]), - ((3, 2), 0, 2, [[0.0, 0.0], [1.0, 1.0]]), - ((3, 2), 1, 2, [[2.0, 2.0]]), - ), -) -def test_mpi_indices(grid, rank, size, expected): - xi, _ = mpi_indices(grid, rank, size) - assert (xi == expected).all()