[Py2F]: Enable Py2F GPU tests on CI (#471)

Enable GPU integrations for Py2F
C2SM · Jun 18, 2024 · 8cb8ee7 · 8cb8ee7
1 parent 0efbeeb
commit 8cb8ee7
Show file tree

Hide file tree

Showing 11 changed files with 251 additions and 341 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -23,6 +23,7 @@ htmlcov/
 .nox/
 .pytest_cache
 .mypy_cache
+.ruff_cache
 
 # Distribution / packaging
 *.egg-info/
@@ -41,3 +42,6 @@ Thumbs.db
 
 # Git directory
 .git/
+
+# serialized data
+testdata
diff --git a/ci/base.yml b/ci/base.yml
@@ -12,14 +12,14 @@ stages:
 
 variables:
   PYTHON_VERSION: "3.10"
-  PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/icon4py:$CI_COMMIT_SHORT_SHA
+  PERSIST_IMAGE_NAME: "${CSCS_REGISTRY_PATH}/icon4py:${CI_COMMIT_SHORT_SHA}"
 
 .build_template:
   stage: build
   extends: .container-builder-cscs-zen2
   variables:
     DOCKERFILE: ci/docker/Dockerfile.build
-    DOCKER_BUILD_ARGS: '["PYVERSION=$PYVERSION"]'
+    DOCKER_BUILD_ARGS: '["PYVERSION=${PYVERSION}"]'
     <<: *py310
 
 .test_template:
@@ -33,7 +33,7 @@ variables:
     - pyversion_no_dot="${PYTHON_VERSION//./}"
     - pip install tox clang-format
     - python -c "import cupy"
-    - ls ${TEST_DATA_PATH}
+    - ls "${TEST_DATA_PATH}"
   variables:
     SLURM_JOB_NUM_NODES: 1
     SLURM_NTASKS: 1
@@ -43,4 +43,8 @@ variables:
     VIRTUALENV_SYSTEM_SITE_PACKAGES: 1
     CSCS_NEEDED_DATA: icon4py
     TEST_DATA_PATH: "/project/d121/icon4py/ci/testdata"
-    ICON_GRID_LOC: "/project/d121/icon4py/ci/testdata/grids/mch_ch_r04b09_dsl"
+    ICON_GRID_LOC: "${TEST_DATA_PATH}/grids/mch_ch_r04b09_dsl"
+    PY2F_GPU_TESTS: 1
+    HPC_SDK_PATH: "/opt/nvidia/hpc_sdk/Linux_x86_64/22.11"
+    CUDACXX: "${HPC_SDK_PATH}/compilers/bin/nvcc"
+    NVFORTRAN_COMPILER: "${HPC_SDK_PATH}/compilers/bin/nvfortran"
diff --git a/ci/docker/Dockerfile.build b/ci/docker/Dockerfile.build
@@ -1,4 +1,5 @@
-FROM docker.io/nvidia/cuda:11.2.2-devel-ubuntu20.04
+FROM ubuntu:20.04
+
 ENV LANG C.UTF-8
 ENV LC_ALL C.UTF-8
 
@@ -29,18 +30,39 @@ RUN apt-get update -qq && apt-get install -qq -y --no-install-recommends \
     htop && \
     rm -rf /var/lib/apt/lists/*
 
-RUN wget --quiet -O boost_1_72_0.tar.gz https://sourceforge.net/projects/boost/files/boost/1.72.0/boost_1_72_0.tar.gz/download && \
+# Install NVIDIA HPC SDK for nvfortran
+ARG HPC_SDK_VERSION=22.11
+ARG HPC_SDK_NAME=nvhpc_2022_2211_Linux_x86_64_cuda_11.8
+ARG HPC_SDK_URL=https://developer.download.nvidia.com/hpc-sdk/22.11/${HPC_SDK_NAME}.tar.gz
+
+RUN wget -q ${HPC_SDK_URL} -O /tmp/nvhpc.tar.gz && \
+    mkdir -p /opt/nvidia && \
+    tar -xzf /tmp/nvhpc.tar.gz -C /opt/nvidia && \
+    rm /tmp/nvhpc.tar.gz
+
+ENV NVHPC_DEFAULT_CUDA=11.8
+ENV NVHPC_SILENT=1
+RUN cd /opt/nvidia/${HPC_SDK_NAME} && ./install
+
+# Set environment variables
+ENV HPC_SDK_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/${HPC_SDK_VERSION}
+
+ENV PATH=${HPC_SDK_PATH}/compilers/bin:${HPC_SDK_PATH}/comm_libs/mpi/bin:${PATH} \
+    MANPATH=${HPC_SDK_PATH}/compilers/man:${MANPATH} \
+    LD_LIBRARY_PATH=${HPC_SDK_PATH}/cuda/lib64:${HPC_SDK_PATH}/math_libs/lib64:${LD_LIBRARY_PATH}
+
+# Install Boost
+RUN wget -q -O boost_1_72_0.tar.gz https://sourceforge.net/projects/boost/files/boost/1.72.0/boost_1_72_0.tar.gz/download && \
     echo c66e88d5786f2ca4dbebb14e06b566fb642a1a6947ad8cc9091f9f445134143f boost_1_72_0.tar.gz > boost_hash.txt && \
     sha256sum -c boost_hash.txt && \
     tar xzf boost_1_72_0.tar.gz && \
     mv boost_1_72_0/boost /usr/local/include/ && \
     rm boost_1_72_0.tar.gz boost_hash.txt
 
 ENV BOOST_ROOT /usr/local/
-ENV CUDA_HOME /usr/local/cuda
-
-ARG PYVERSION
 
+# Install pyenv and Python version specified by PYVERSION
+ENV PYVERSION 3.10.9
 RUN curl https://pyenv.run | bash
 
 ENV PYENV_ROOT /root/.pyenv
@@ -54,6 +76,6 @@ RUN pyenv update && \
 
 ENV PATH="/root/.pyenv/shims:${PATH}"
 
+# Install Python packages
 COPY . /icon4py
-
-RUN pip install --upgrade pip setuptools wheel tox cupy-cuda11x clang-format
+RUN pip install --upgrade pip setuptools wheel tox clang-format cupy-cuda11x
diff --git a/tools/src/icon4pytools/py2fgen/template.py b/tools/src/icon4pytools/py2fgen/template.py
@@ -219,6 +219,7 @@ class PythonWrapperGenerator(TemplatedGenerator):
 {% if _this_node.backend == 'GPU' %}import cupy as cp {% endif %}
 from numpy.typing import NDArray
 from gt4py.next.iterator.embedded import np_as_located_field
+from gt4py.next.ffront.fbuiltins import int32
 from icon4py.model.common.settings import xp
 
 {% if _this_node.is_gt4py_program_present %}

diff --git a/tools/src/icon4pytools/py2fgen/wrappers/simple.py b/tools/src/icon4pytools/py2fgen/wrappers/simple.py
@@ -16,12 +16,11 @@
 
 from gt4py.next.common import GridType
 from gt4py.next.ffront.decorator import field_operator, program
-from gt4py.next.ffront.fbuiltins import Field, float64, int32, neighbor_sum
+from gt4py.next.ffront.fbuiltins import Field, float64
 from icon4py.model.common.caching import CachedProgram
-from icon4py.model.common.dimension import C2CE, C2E, C2EDim, CEDim, CellDim, EdgeDim, KDim
+from icon4py.model.common.dimension import CellDim, KDim
 from icon4py.model.common.grid.simple import SimpleGrid
 from icon4py.model.common.settings import backend
-from icon4py.model.common.type_alias import wpfloat
 
 
 # global profiler object
@@ -55,94 +54,18 @@ def square(
     _square(inp, out=result)
 
 
+square_cached = CachedProgram(square, with_domain=False)
+
+
 def square_from_function(
     inp: Field[[CellDim, KDim], float64],
     result: Field[[CellDim, KDim], float64],
 ):
-    square(inp, result, offset_provider={})
-
-
-@field_operator
-def _multi_return(
-    z_vn_avg: Field[[EdgeDim, KDim], wpfloat],
-    mass_fl_e: Field[[EdgeDim, KDim], wpfloat],
-    vn_traj: Field[[EdgeDim, KDim], wpfloat],
-    mass_flx_me: Field[[EdgeDim, KDim], wpfloat],
-    geofac_div: Field[[CEDim], wpfloat],
-    z_nabla2_e: Field[[EdgeDim, KDim], wpfloat],
-    r_nsubsteps: wpfloat,
-) -> tuple[Field[[EdgeDim, KDim], wpfloat], Field[[EdgeDim, KDim], wpfloat]]:
-    """accumulate_prep_adv_fields stencil formerly known as _mo_solve_nonhydro_stencil_34."""
-    vn_traj_wp = vn_traj + r_nsubsteps * z_vn_avg
-    mass_flx_me_wp = mass_flx_me + r_nsubsteps * mass_fl_e
-    z_temp_wp = neighbor_sum(z_nabla2_e(C2E) * geofac_div(C2CE), axis=C2EDim)  # noqa: F841
-    return vn_traj_wp, mass_flx_me_wp
-
-
-@program(grid_type=GridType.UNSTRUCTURED, backend=backend)
-def multi_return(
-    z_vn_avg: Field[[EdgeDim, KDim], wpfloat],
-    mass_fl_e: Field[[EdgeDim, KDim], wpfloat],
-    vn_traj: Field[[EdgeDim, KDim], wpfloat],
-    mass_flx_me: Field[[EdgeDim, KDim], wpfloat],
-    geofac_div: Field[[CEDim], wpfloat],
-    z_nabla2_e: Field[[EdgeDim, KDim], wpfloat],
-    r_nsubsteps: wpfloat,
-    horizontal_start: int32,
-    horizontal_end: int32,
-    vertical_start: int32,
-    vertical_end: int32,
-):
-    _multi_return(
-        z_vn_avg,
-        mass_fl_e,
-        vn_traj,
-        mass_flx_me,
-        geofac_div,
-        z_nabla2_e,
-        r_nsubsteps,
-        out=(vn_traj, mass_flx_me),
-        domain={
-            EdgeDim: (horizontal_start, horizontal_end),
-            KDim: (vertical_start, vertical_end),
-        },
-    )
+    square_cached(inp, result, offset_provider={})
 
 
 def square_error(
     inp: Field[[CellDim, KDim], float64],
     result: Field[[CellDim, KDim], float64],
 ):
     raise Exception("Exception foo occurred")
-
-
-multi_return_cached = CachedProgram(multi_return)
-
-
-def multi_return_from_function(
-    z_vn_avg: Field[[EdgeDim, KDim], wpfloat],
-    mass_fl_e: Field[[EdgeDim, KDim], wpfloat],
-    vn_traj: Field[[EdgeDim, KDim], wpfloat],
-    mass_flx_me: Field[[EdgeDim, KDim], wpfloat],
-    geofac_div: Field[[CEDim], wpfloat],
-    z_nabla2_e: Field[[EdgeDim, KDim], wpfloat],
-    r_nsubsteps: wpfloat,
-    horizontal_start: int32,
-    horizontal_end: int32,
-    vertical_start: int32,
-    vertical_end: int32,
-):
-    multi_return_cached(
-        z_vn_avg,
-        mass_fl_e,
-        vn_traj,
-        mass_flx_me,
-        geofac_div,
-        z_nabla2_e,
-        r_nsubsteps,
-        horizontal_start,
-        horizontal_end,
-        vertical_start,
-        vertical_end,
-        offset_provider=grid.offset_providers,
-    )
diff --git a/tools/tests/py2fgen/fortran_samples/test_diffusion.f90 b/tools/tests/py2fgen/fortran_samples/test_diffusion.f90
@@ -101,20 +101,20 @@ program diffusion_simulation
    integer(c_int) :: n
 
    ! Constants and types
-   integer(c_int), parameter :: num_cells = 20480
-   integer(c_int), parameter :: num_edges = 30720
-   integer(c_int), parameter :: num_vertices = 10242
-   integer(c_int), parameter :: num_levels = 60
+   integer(c_int), parameter :: num_cells = 20896
+   integer(c_int), parameter :: num_edges = 31558
+   integer(c_int), parameter :: num_vertices = 10663
+   integer(c_int), parameter :: num_levels = 65
    integer(c_int), parameter :: num_c2ec2o = 4
    integer(c_int), parameter :: num_v2e = 6
-   integer(c_int), parameter :: num_c2e = 2
+   integer(c_int), parameter :: num_c2e = 3
    integer(c_int), parameter :: num_e2c2v = 4
    integer(c_int), parameter :: num_c2e2c = 3
-   integer(c_int), parameter :: num_e2c = 3
+   integer(c_int), parameter :: num_e2c = 2
    real(c_double), parameter :: mean_cell_area = 24907282236.708576
    integer(c_int), parameter :: ndyn_substeps = 2
-   real(c_double), parameter :: dtime = 2.0
-   real(c_double), parameter :: rayleigh_damping_height = 50000
+   real(c_double), parameter :: dtime = 10.0
+   real(c_double), parameter :: rayleigh_damping_height = 12500.0
    integer(c_int), parameter :: nflatlev = 30
    integer(c_int), parameter :: nflat_gradp = 59
    integer(c_int), parameter :: diffusion_type = 5 ! Assuming DiffusionType.SMAGORINSKY_4TH_ORDER is represented by 5
@@ -127,6 +127,11 @@ program diffusion_simulation
    real(c_double), parameter :: smagorinski_scaling_factor = 0.025
    logical(c_int), parameter :: hdiff_temp = .true.
    logical(c_int), parameter :: linit = .false.
+   real(c_double), parameter :: denom_diffu_v = 150.0
+   real(c_double), parameter :: thslp_zdiffu = 0.02
+   real(c_double), parameter :: thhgtd_zdiffu = 125.0
+   integer(c_int), parameter :: itype_sher = 2
+   real(c_double), parameter :: nudge_max_coeff = 0.075
 
    ! Declaring arrays for diffusion_init and diffusion_run
    real(c_double), dimension(:), allocatable :: vct_a
@@ -224,7 +229,7 @@ program diffusion_simulation
 
    ! Fill arrays with random numbers
    ! For 1D arrays
-   call fill_random_1d(vct_a, 0.0_c_double, 75000.0_c_double) ! needs to be above 50000 damping height restriction
+   call fill_random_1d(vct_a, 0.0_c_double, 75000.0_c_double) ! needs to be above 12500 damping height restriction
    call fill_random_1d(nudgecoeff_e, 0.0_c_double, 1.0_c_double)
    call fill_random_1d(tangent_orientation, 0.0_c_double, 1.0_c_double)
    call fill_random_1d(inverse_primal_edge_lengths, 0.0_c_double, 1.0_c_double)
@@ -281,30 +286,71 @@ program diffusion_simulation
     !$acc mask_hdiff)
 
    ! Call diffusion_init
-   call diffusion_init(vct_a, theta_ref_mc, wgtfac_c, e_bln_c_s, geofac_div, &
-                       geofac_grg_x, geofac_grg_y, geofac_n2s, nudgecoeff_e, rbf_coeff_1, &
-                       rbf_coeff_2, mask_hdiff, zd_diffcoef, zd_vertoffset, zd_intcoef, &
-                       num_levels, mean_cell_area, ndyn_substeps, rayleigh_damping_height, &
-                       nflatlev, nflat_gradp, diffusion_type, &
-                       hdiff_w, hdiff_vn, zdiffu_t, type_t_diffu, type_vn_diffu, &
-                       hdiff_efdt_ratio, smagorinski_scaling_factor, hdiff_temp, &
-                       tangent_orientation, inverse_primal_edge_lengths, inv_dual_edge_length, &
-                       inv_vert_vert_length, edge_areas, f_e, cell_areas, primal_normal_vert_x, &
-                       primal_normal_vert_y, dual_normal_vert_x, dual_normal_vert_y, &
-                       primal_normal_cell_x, primal_normal_cell_y, dual_normal_cell_x, &
-                       dual_normal_cell_y, rc)
+   call diffusion_init(vct_a, &
+                      theta_ref_mc, &
+                      wgtfac_c, &
+                      e_bln_c_s, &
+                      geofac_div, &
+                      geofac_grg_x, &
+                      geofac_grg_y, &
+                      geofac_n2s, &
+                      nudgecoeff_e, &
+                      rbf_coeff_1, &
+                      rbf_coeff_2, &
+                      mask_hdiff, &
+                      zd_diffcoef, &
+                      zd_vertoffset, &
+                      zd_intcoef, &
+                      num_levels, &
+                      mean_cell_area, &
+                      ndyn_substeps, &
+                      rayleigh_damping_height, &
+                      nflatlev, &
+                      nflat_gradp, &
+                      diffusion_type, &
+                      hdiff_w, &
+                      hdiff_vn, &
+                      zdiffu_t, &
+                      type_t_diffu, &
+                      type_vn_diffu, &
+                      hdiff_efdt_ratio, &
+                      smagorinski_scaling_factor, &
+                      hdiff_temp, &
+                      thslp_zdiffu, &
+                      thhgtd_zdiffu, &
+                      denom_diffu_v, &
+                      nudge_max_coeff, &
+                      itype_sher, &
+                      tangent_orientation, &
+                      inverse_primal_edge_lengths, &
+                      inv_dual_edge_length, &
+                      inv_vert_vert_length, &
+                      edge_areas, &
+                      f_e, &
+                      cell_areas, &
+                      primal_normal_vert_x, &
+                      primal_normal_vert_y, &
+                      dual_normal_vert_x, &
+                      dual_normal_vert_y, &
+                      primal_normal_cell_x, &
+                      primal_normal_cell_y, &
+                      dual_normal_cell_x, &
+                      dual_normal_cell_y, &
+                      rc)
 
    print *, "Python exit code = ", rc
    if (rc /= 0) then
        call exit(1)
    end if
 
-   do n = 1, 60
+   ! initial run
+   call diffusion_run(w, vn, exner, theta_v, rho, hdef_ic, div_ic, dwdx, dwdy, dtime, linit, rc)
+   print *, "Initial diffusion run done"
+
    ! Call diffusion_run
    call profile_enable(rc)
    call diffusion_run(w, vn, exner, theta_v, rho, hdef_ic, div_ic, dwdx, dwdy, dtime, linit, rc)
    call profile_disable(rc)
-   end do
 
    print *, "Python exit code = ", rc
    if (rc /= 0) then