Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test examples in parallel #417

Merged
merged 36 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
61b08cd
Test examples in parallel
JDBetteridge Aug 2, 2023
ad13f33
Add test timeout in case parallel tests deadlock
JDBetteridge Aug 2, 2023
52e18ee
Fix failing Skamarock Klemp test and add PointDataOutput warning/error
JDBetteridge Aug 2, 2023
08b96da
Use GustoIOError throughout io.py
JDBetteridge Aug 3, 2023
4b2c64e
Investigate
JDBetteridge Aug 7, 2023
0303bf7
Move artifact upload to seperate step
JDBetteridge Aug 7, 2023
6475897
Try to raise exceptions in parallel
JDBetteridge Aug 11, 2023
ef8419e
Re-enable all tests and cross fingers
JDBetteridge Aug 11, 2023
c051c5b
Add a bunch of debug log messages to SIQN timestepper
JDBetteridge Aug 11, 2023
4004100
Print fewer decimal places in timestep output
JDBetteridge Aug 11, 2023
1bc0b4a
Merge remote-tracking branch 'origin/main' into JDBetteridge/parallel…
tommbendall Oct 8, 2023
70bae42
Remove superfluous debug log line and remove SIQN acronym
JDBetteridge Oct 9, 2023
13f33a7
Disable pytest stdout, increase artefact retention time
JDBetteridge Oct 9, 2023
fce815c
Merge branch 'main' into JDBetteridge/parallel_tests
tommbendall Mar 13, 2024
e31896c
Merge branch 'main' into JDBetteridge/parallel_tests
tommbendall Apr 30, 2024
9b8cecd
Merge branch 'main' into JDBetteridge/parallel_tests
JDBetteridge Jul 11, 2024
8cd30d7
Merge branch 'main' into JDBetteridge/parallel_tests
JDBetteridge Jul 22, 2024
4049fb4
Take maximum of h across all ranks
JDBetteridge Jul 23, 2024
9090bba
Merge remote-tracking branch 'origin/main' into JDBetteridge/parallel…
JDBetteridge Jul 23, 2024
2d4753c
Disable coverage as it is known to cause issues with Python<3.12.4
JDBetteridge Jul 23, 2024
3142f3f
Revert me: Temporarily disable all tests but examples
JDBetteridge Jul 23, 2024
a72bcaa
Revert me: Serialise the example tests
JDBetteridge Jul 23, 2024
af37c3d
Remove redundant setup Python
JDBetteridge Jul 23, 2024
48cfd94
Tidying
JDBetteridge Jul 24, 2024
3c9c361
Change solver parameters
JDBetteridge Jul 24, 2024
01d6a2f
Enough! Just use LU for everything
JDBetteridge Jul 24, 2024
24028bf
Revert "Enough! Just use LU for everything"
JDBetteridge Jul 24, 2024
b478ada
Revert "Change solver parameters"
JDBetteridge Jul 24, 2024
988d5cf
Increase timeouts
JDBetteridge Jul 24, 2024
ebe055b
Re-enable xdist parallel pytest
JDBetteridge Jul 24, 2024
169a31e
Re-enable full test suite
JDBetteridge Jul 24, 2024
1f7befb
Shorten shallow water 1d
JDBetteridge Jul 24, 2024
c1de44d
Merge branch 'main' into JDBetteridge/parallel_tests
JDBetteridge Jul 24, 2024
d9ab409
Try turning off PYOP2 compiler optimisation flags for all examples
JDBetteridge Jul 25, 2024
4da1c00
Merge branch 'JDBetteridge/parallel_tests' of github.com:firedrakepro…
JDBetteridge Jul 25, 2024
b32a08f
Fix environment
JDBetteridge Jul 25, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,37 @@ jobs:
run: |
cd ..
rm -rf build
- name: Setup python
uses: actions/setup-python@v4
with:
python-version: 3.8
- name: Install Gusto
run: |
. /home/firedrake/firedrake/bin/activate
python -m pip install -r requirements.txt
python -m pip install -e .
python -m pip install \
pytest-cov pytest-timeout pytest-xdist
pytest-timeout pytest-xdist
- name: Gusto tests
run: |
. /home/firedrake/firedrake/bin/activate
which firedrake-clean
firedrake-clean
export GUSTO_PARALLEL_LOG=FILE
python -m pytest \
-n 12 --dist worksteal \
--durations=100 \
--cov gusto \
--timeout=3600 \
--timeout-method=thread \
-o faulthandler_timeout=3660 \
-v unit-tests integration-tests examples
timeout-minutes: 120
- name: Prepare logs
if: always()
run: |
mkdir logs
cd /tmp/pytest-of-firedrake/pytest-0/
find . -name "*.log" -exec cp --parents {} /__w/gusto/gusto/logs/ \;
- name: Upload artifact
if: always()
uses: actions/upload-pages-artifact@v1
with:
name: log-files
path: /__w/gusto/gusto/logs
retention-days: 5

7 changes: 6 additions & 1 deletion examples/shallow_water/shallow_water_1d.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import sys

from firedrake import *
from gusto import *
Expand All @@ -9,6 +10,10 @@
delta = L/n
mesh = PeriodicIntervalMesh(128, L)
dt = 0.0001
if '--running-tests' in sys.argv:
T = 0.0005
else:
T = 1

domain = Domain(mesh, dt, 'CG', 1)

Expand Down Expand Up @@ -61,4 +66,4 @@

D += parameters.H

stepper.run(0, 1)
stepper.run(0, T)
18 changes: 17 additions & 1 deletion examples/test_examples_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import subprocess
import glob
import sys
import os


examples_dir = abspath(dirname(__file__))
Expand All @@ -19,4 +20,19 @@ def test_example_runs(example_file, tmpdir, monkeypatch):
# This ensures that the test writes output in a temporary
# directory, rather than where pytest was run from.
monkeypatch.chdir(tmpdir)
subprocess.check_call([sys.executable, example_file, "--running-tests"])
subprocess.run(
[sys.executable, example_file, "--running-tests"],
check=True,
env=os.environ | {"PYOP2_CFLAGS": "-O0"}
)


def test_example_runs_parallel(example_file, tmpdir, monkeypatch):
# This ensures that the test writes output in a temporary
# directory, rather than where pytest was run from.
monkeypatch.chdir(tmpdir)
subprocess.run(
["mpiexec", "-n", "4", sys.executable, example_file, "--running-tests"],
check=True,
env=os.environ | {"PYOP2_CFLAGS": "-O0"}
)
10 changes: 6 additions & 4 deletions gusto/solvers/linear_solvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
from pyop2.profiling import timed_function, timed_region

from gusto.equations.active_tracers import TracerVariableType
from gusto.core.logging import logger, DEBUG, logging_ksp_monitor_true_residual
from gusto.core.logging import (
logger, DEBUG, logging_ksp_monitor_true_residual,
attach_custom_monitor
)
from gusto.core.labels import linearisation, time_derivative, hydrostatic
from gusto.equations import thermodynamics
from gusto.recovery.recovery_kernels import AverageWeightings, AverageKernel
Expand Down Expand Up @@ -56,8 +59,8 @@ def __init__(self, equations, alpha=0.5, solver_parameters=None,
solver_parameters = p
self.solver_parameters = solver_parameters

# ~ if logger.isEnabledFor(DEBUG):
# ~ self.solver_parameters["ksp_monitor_true_residual"] = None
if logger.isEnabledFor(DEBUG):
self.solver_parameters["ksp_monitor_true_residual"] = None

# setup the solver
self._setup_solver()
Expand Down Expand Up @@ -353,7 +356,6 @@ def L_tr(f):
# Log residuals on hybridized solver
self.log_ksp_residuals(self.hybridized_solver.snes.ksp)
# Log residuals on the trace system too
from gusto.core.logging import attach_custom_monitor
python_context = self.hybridized_solver.snes.ksp.pc.getPythonContext()
attach_custom_monitor(python_context, logging_ksp_monitor_true_residual)

Expand Down
16 changes: 10 additions & 6 deletions gusto/timestepping/semi_implicit_quasi_newton.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,12 +262,12 @@ def timestep(self):
x_after_slow(self.field_name).assign(xn(self.field_name))
if len(self.slow_physics_schemes) > 0:
with timed_stage("Slow physics"):
logger.info('SIQN: Slow physics')
logger.info('Semi-implicit Quasi Newton: Slow physics')
for _, scheme in self.slow_physics_schemes:
scheme.apply(x_after_slow(scheme.field_name), x_after_slow(scheme.field_name))

with timed_stage("Apply forcing terms"):
logger.info('SIQN: Explicit forcing')
logger.info('Semi-implicit Quasi Newton: Explicit forcing')
# Put explicit forcing into xstar
self.forcing.apply(x_after_slow, xn, xstar(self.field_name), "explicit")

Expand All @@ -281,14 +281,14 @@ def timestep(self):
self.io.log_courant(self.fields, 'transporting_velocity',
message=f'transporting velocity, outer iteration {outer}')
for name, scheme in self.active_transport:
logger.info(f'SIQN: Transport {outer}: {name}')
logger.info(f'Semi-implicit Quasi Newton: Transport {outer}: {name}')
# transports a field from xstar and puts result in xp
scheme.apply(xp(name), xstar(name))

x_after_fast(self.field_name).assign(xp(self.field_name))
if len(self.fast_physics_schemes) > 0:
with timed_stage("Fast physics"):
logger.info(f'SIQN: Fast physics {outer}')
logger.info(f'Semi-implicit Quasi Newton: Fast physics {outer}')
for _, scheme in self.fast_physics_schemes:
scheme.apply(x_after_fast(scheme.field_name), x_after_fast(scheme.field_name))

Expand All @@ -300,14 +300,14 @@ def timestep(self):
# TODO: this is where to update the reference state

with timed_stage("Apply forcing terms"):
logger.info(f'SIQN: Implicit forcing {(outer, inner)}')
logger.info(f'Semi-implicit Quasi Newton: Implicit forcing {(outer, inner)}')
self.forcing.apply(xp, xnp1, xrhs, "implicit")

xrhs -= xnp1(self.field_name)
xrhs += xrhs_phys

with timed_stage("Implicit solve"):
logger.info(f'SIQN: Mixed solve {(outer, inner)}')
logger.info(f'Semi-implicit Quasi Newton: Mixed solve {(outer, inner)}')
self.linear_solver.solve(xrhs, dy) # solves linear system and places result in dy

xnp1X = xnp1(self.field_name)
Expand All @@ -320,17 +320,21 @@ def timestep(self):

for name, scheme in self.auxiliary_schemes:
# transports a field from xn and puts result in xnp1
logger.debug(f"Semi-implicit Quasi-Newton auxiliary scheme for {name}")
scheme.apply(xnp1(name), xn(name))

with timed_stage("Diffusion"):
for name, scheme in self.diffusion_schemes:
logger.debug(f"Semi-implicit Quasi-Newton diffusing {name}")
scheme.apply(xnp1(name), xnp1(name))

if len(self.final_physics_schemes) > 0:
with timed_stage("Final Physics"):
for _, scheme in self.final_physics_schemes:
scheme.apply(xnp1(scheme.field_name), xnp1(scheme.field_name))

logger.debug("Leaving Semi-implicit Quasi-Newton timestep method")

def run(self, t, tmax, pick_up=False):
"""
Runs the model for the specified time, from t to tmax.
Expand Down
3 changes: 2 additions & 1 deletion gusto/timestepping/timestepper.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def run(self, t, tmax, pick_up=False):

# Set up dump, which may also include an initial dump
with timed_stage("Dump output"):
logger.debug('Dumping output to disk')
self.io.setup_dump(self.fields, t, pick_up)

self.t.assign(t)
Expand All @@ -197,7 +198,7 @@ def run(self, t, tmax, pick_up=False):
if self.io.output.checkpoint and self.io.output.checkpoint_method == 'dumbcheckpoint':
self.io.chkpt.close()

logger.info(f'TIMELOOP complete. t={float(self.t)}, tmax={tmax}')
logger.info(f'TIMELOOP complete. t={float(self.t):.5f}, {tmax=:.5f}')

def set_reference_profiles(self, reference_profiles):
"""
Expand Down
Loading