Skip to content

Commit

Permalink
CPU <-> GPU covariant derivatives are now identical (towards tensor ops)
Browse files Browse the repository at this point in the history
Adds unit tests (known trig solutions) for both cpu and gpu devices
which compare parent array outcomes
	modified:   ext/cuda/operators_sem_shmem.jl
	modified:   ext/cuda/operators_spectral_element.jl
	modified:   .buildkite/pipeline.yml
	modified:   test/runtests.jl

Relaxes test_broken on extruded CUDA operator tests (2 instances)
Add 2 additional operator tests
	modified:   test/Operators/hybrid/extruded_3dbox_cuda.jl
	modified:   test/Operators/hybrid/extruded_sphere_cuda.jl

Skip bounds check
	modified:   ext/cuda/operators_spectral_element.jl
  • Loading branch information
Akshay Sridhar authored and akshaysridhar committed Aug 8, 2024
1 parent bf4d141 commit 8be268a
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 18 deletions.
11 changes: 11 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,17 @@ steps:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1

- label: "Unit: velocity grad tensor ops"
key: unit_spectral_tensor_op_cuda
command: "julia --color=yes --check-bounds=yes --project=.buildkite test/Operators/spectralelement/covar_deriv_ops.jl"
command:
- "julia --project=.buildkite -e 'using CUDA; CUDA.versioninfo()'"
- "julia --color=yes --check-bounds=yes --project=.buildkite test/Operators/spectralelement/covar_deriv_ops.jl CUDA"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1

- label: "Unit: hybrid operators cuda"
key: unit_ops_cuda
Expand Down
62 changes: 57 additions & 5 deletions ext/cuda/operators_sem_shmem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -94,23 +94,75 @@ Base.@propagate_inbounds function operator_shmem(
FT = Spaces.undertype(space)
QS = Spaces.quadrature_style(space)
Nq = Quadratures.degrees_of_freedom(QS)
# allocate temp output
IT = eltype(arg)
input = CUDA.CuStaticSharedArray(IT, (Nq, Nq, Nvt))
return (input,)
ET = eltype(IT)
RT = operator_return_eltype(op, IT)
RT12 = Geometry.AxisTensor{
ET,
2,
Tuple{Geometry.CovariantAxis{(1, 2)}, Geometry.LocalAxis{(1, 2)}},
SMatrix{2, 2, ET, 4},
}
RT123 = Geometry.AxisTensor{
ET,
2,
Tuple{Geometry.CovariantAxis{(1, 2)}, Geometry.LocalAxis{(1, 2, 3)}},
SMatrix{2, 3, ET, 6},
}
if RT <: Geometry.Covariant12Vector
# allocate temp output
input = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
return (input,)
elseif RT <: RT12
v₁ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
v₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
return (v₁, v₂)
elseif RT <: RT123
v₁ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
v₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
v₃ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
return (v₁, v₂, v₃)
end
end

Base.@propagate_inbounds function operator_fill_shmem!(
op::Gradient{(1, 2)},
(input,),
input,
space,
ij,
slabidx,
arg,
)
vt = threadIdx().z
i, j = ij.I
input[i, j, vt] = arg
local_geometry = get_local_geometry(space, ij, slabidx)
RT = operator_return_eltype(op, typeof(arg))
ET = eltype(eltype(arg))
RT12 = Geometry.AxisTensor{
ET,
2,
Tuple{Geometry.CovariantAxis{(1, 2)}, Geometry.LocalAxis{(1, 2)}},
SMatrix{2, 2, ET, 4},
}
RT123 = Geometry.AxisTensor{
ET,
2,
Tuple{Geometry.CovariantAxis{(1, 2)}, Geometry.LocalAxis{(1, 2, 3)}},
SMatrix{2, 3, ET, 6},
}
if RT <: Geometry.Covariant12Vector
(v,) = input
v[i, j, vt] = arg
elseif RT <: RT12
v₁, v₂ = input
v₁[i, j, vt] = Geometry.LocalVector(arg, local_geometry).u
v₂[i, j, vt] = Geometry.LocalVector(arg, local_geometry).v
elseif RT <: RT123
v₁, v₂, v₃ = input
v₁[i, j, vt] = Geometry.LocalVector(arg, local_geometry).u
v₂[i, j, vt] = Geometry.LocalVector(arg, local_geometry).v
v₃[i, j, vt] = Geometry.LocalVector(arg, local_geometry).w
end
end


Expand Down
62 changes: 53 additions & 9 deletions ext/cuda/operators_spectral_element.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import ClimaCore: Spaces, Quadratures, Topologies
import ClimaCore: Operators, Geometry, Quadratures, RecursiveApply
import ClimaComms
using CUDA: @cuda
using CUDA
import ClimaCore.Operators: AbstractSpectralStyle, strip_space
import ClimaCore.Operators: SpectralBroadcasted, set_node!, get_node
Expand Down Expand Up @@ -256,7 +255,7 @@ end

Base.@propagate_inbounds function operator_evaluate(
op::Gradient{(1, 2)},
(input,),
input,
space,
ij,
slabidx,
Expand All @@ -269,14 +268,59 @@ Base.@propagate_inbounds function operator_evaluate(
Nq = Quadratures.degrees_of_freedom(QS)
D = Quadratures.differentiation_matrix(FT, QS)

∂f∂ξ₁ = D[i, 1] * input[1, j, vt]
∂f∂ξ₂ = D[j, 1] * input[i, 1, vt]

for k in 2:Nq
∂f∂ξ₁ += D[i, k] * input[k, j, vt]
∂f∂ξ₂ += D[j, k] * input[i, k, vt]
if length(input) == 1 # check types
(v₁,) = input
@inbounds begin
∂f∂ξ₁ = D[i, 1] v₁[1, j, vt]
∂f∂ξ₂ = D[j, 1] v₁[i, 1, vt]
for k in 2:Nq
∂f∂ξ₁ = ∂f∂ξ₁ D[i, k] v₁[k, j, vt]
∂f∂ξ₂ = ∂f∂ξ₂ D[j, k] v₁[i, k, vt]
end
end
return Geometry.Covariant12Vector(∂f∂ξ₁, ∂f∂ξ₂)
elseif length(input) == 2
# Update `shmem`
v₁, v₂ = input
@inbounds begin
∂f₁∂ξ₁ = D[i, 1] v₁[1, j, vt]
∂f₁∂ξ₂ = D[j, 1] v₁[i, 1, vt]
∂f₂∂ξ₁ = D[i, 1] v₂[1, j, vt]
∂f₂∂ξ₂ = D[j, 1] v₂[i, 1, vt]
@simd for k in 2:Nq
∂f₁∂ξ₁ = ∂f₁∂ξ₁ D[i, k] v₁[k, j, vt]
∂f₁∂ξ₂ = ∂f₁∂ξ₂ D[j, k] v₁[i, k, vt]
∂f₂∂ξ₁ = ∂f₂∂ξ₁ D[i, k] v₂[k, j, vt]
∂f₂∂ξ₂ = ∂f₂∂ξ₂ D[j, k] v₂[i, k, vt]
end
end
return Geometry.AxisTensor(
(Geometry.Covariant12Axis(), Geometry.UVAxis()),
(∂f₁∂ξ₁, ∂f₁∂ξ₂, ∂f₂∂ξ₁, ∂f₂∂ξ₂),
)
else
v₁, v₂, v₃ = input
@inbounds begin
∂f₁∂ξ₁ = D[i, 1] v₁[1, j, vt]
∂f₁∂ξ₂ = D[j, 1] v₁[i, 1, vt]
∂f₂∂ξ₁ = D[i, 1] v₂[1, j, vt]
∂f₂∂ξ₂ = D[j, 1] v₂[i, 1, vt]
∂f₃∂ξ₁ = D[i, 1] v₃[1, j, vt]
∂f₃∂ξ₂ = D[j, 1] v₃[i, 1, vt]
@simd for k in 2:Nq
∂f₁∂ξ₁ = ∂f₁∂ξ₁ D[i, k] v₁[k, j, vt]
∂f₁∂ξ₂ = ∂f₁∂ξ₂ D[j, k] v₁[i, k, vt]
∂f₂∂ξ₁ = ∂f₂∂ξ₁ D[i, k] v₂[k, j, vt]
∂f₂∂ξ₂ = ∂f₂∂ξ₂ D[j, k] v₂[i, k, vt]
∂f₃∂ξ₁ = ∂f₃∂ξ₁ D[i, k] v₃[k, j, vt]
∂f₃∂ξ₂ = ∂f₃∂ξ₂ D[j, k] v₃[i, k, vt]
end
end
return Geometry.AxisTensor(
(Geometry.Covariant12Axis(), Geometry.UVWAxis()),
(∂f₁∂ξ₁, ∂f₁∂ξ₂, ∂f₂∂ξ₁, ∂f₂∂ξ₂, ∂f₃∂ξ₁, ∂f₃∂ξ₂),
)
end
return Geometry.Covariant12Vector(∂f∂ξ₁, ∂f∂ξ₂)
end

Base.@propagate_inbounds function operator_evaluate(
Expand Down
4 changes: 2 additions & 2 deletions test/Operators/hybrid/extruded_3dbox_cuda.jl
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,9 @@ end

# Test wdiv(grad()) composed Laplace operator
grad = Operators.Gradient()
@test parent(grad.(g_cpu)) Array(parent(grad.(g_gpu)))
@test parent(wdiv.(grad.(f_cpu))) Array(parent(wdiv.(grad.(f_gpu))))
@test_broken parent(wdiv.(grad.(x_cpu)))
Array(parent(wdiv.(grad.(x_gpu))))
@test parent(wdiv.(grad.(x_cpu))) Array(parent(wdiv.(grad.(x_gpu))))

# Test DSS
@test parent(Spaces.weighted_dss!(wdiv.(grad.(f_cpu))))
Expand Down
4 changes: 2 additions & 2 deletions test/Operators/hybrid/extruded_sphere_cuda.jl
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,9 @@ end

# Test wdiv(grad()) composed Laplace operator
grad = Operators.Gradient()
@test parent(grad.(g_cpu)) Array(parent(grad.(g_gpu)))
@test parent(wdiv.(grad.(f_cpu))) Array(parent(wdiv.(grad.(f_gpu))))
@test_broken parent(wdiv.(grad.(x_cpu)))
Array(parent(wdiv.(grad.(x_gpu))))
@test parent(wdiv.(grad.(x_cpu))) Array(parent(wdiv.(grad.(x_gpu))))

# Test DSS
@test parent(Spaces.weighted_dss!(wdiv.(grad.(f_cpu))))
Expand Down
107 changes: 107 additions & 0 deletions test/Operators/spectralelement/covar_deriv_ops.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
using Logging
using Test

import ClimaCore:
Domains,
Fields,
Geometry,
Meshes,
Operators,
Spaces,
Quadratures,
Topologies,
DataLayouts,
Grids

using ClimaComms
ClimaComms.@import_required_backends
device = ClimaComms.device()
using ClimaComms
using IntervalSets

FT = Float64
xlim = (FT(0), FT(2π))
zlim = (FT(0), FT(1))
helem = 5
velem = 5
npoly = 5
ndims = 3
stretch = Meshes.Uniform()
comms_context = ClimaComms.SingletonCommsContext(device)
FT = eltype(xlim)

# Horizontal Grid Construction
quad = Quadratures.GLL{npoly + 1}()
horzdomain = Domains.RectangleDomain(
Geometry.XPoint{FT}(xlim[1]) .. Geometry.XPoint{FT}(xlim[2]),
Geometry.YPoint{FT}(xlim[1]) .. Geometry.YPoint{FT}(xlim[2]),
x1periodic = true,
x2periodic = true,
)
# Assume same number of elems (helem) in (x,y) directions
horzmesh = Meshes.RectilinearMesh(horzdomain, helem, helem)
horz_topology = Topologies.Topology2D(
comms_context,
horzmesh,
Topologies.spacefillingcurve(horzmesh),
);
h_space =
Spaces.SpectralElementSpace2D(horz_topology, quad, enable_bubble = true);
horz_grid = Spaces.grid(h_space)

# Vertical Grid Construction
vertdomain = Domains.IntervalDomain(
Geometry.ZPoint{FT}(zlim[1]),
Geometry.ZPoint{FT}(zlim[2]);
boundary_names = (:bottom, :top),
)
vertmesh = Meshes.IntervalMesh(vertdomain, stretch, nelems = velem)
vert_face_space = Spaces.FaceFiniteDifferenceSpace(vertmesh)
vert_topology = Topologies.IntervalTopology(
ClimaComms.SingletonCommsContext(device),
vertmesh,
)
vert_grid = Grids.FiniteDifferenceGrid(vert_topology)

grid = Grids.ExtrudedFiniteDifferenceGrid(horz_grid, vert_grid)
cent_space = Spaces.CenterExtrudedFiniteDifferenceSpace(grid)
face_space = Spaces.FaceExtrudedFiniteDifferenceSpace(grid)
ccoords = Fields.coordinate_field(cent_space)
fcoords = Fields.coordinate_field(face_space)

= Operators.Gradient();

η = @. sin(ccoords.x) + cos(ccoords.y)
η_test1 = @. Geometry.project(Geometry.UVAxis(), (η)).components.data.:1
η_test2 = @. Geometry.project(Geometry.UVAxis(), (η)).components.data.:2
Spaces.weighted_dss!(η_test1)
Spaces.weighted_dss!(η_test2)

𝒻₁ = @. Geometry.UVVector(η, 2η)
𝒻₂ = @. Geometry.UVWVector(η, 2η, 3η)

∇η = @. (η)
∇𝒻₁ = @. Geometry.project(Geometry.UVAxis(), (𝒻₁))
for ii in 1:4
Spaces.weighted_dss!(∇𝒻₁.components.data.:($ii))
end

# Check against known solution component-wise
device isa ClimaComms.CUDADevice ? CUDA.allowscalar(true) : nothing
@test parent(η_test1) parent(∇𝒻₁.components.data.:1)
@test parent(η_test2) parent(∇𝒻₁.components.data.:2)
@test parent(2 .* η_test1) parent(∇𝒻₁.components.data.:3)
@test parent(2 .* η_test2) parent(∇𝒻₁.components.data.:4)

∇𝒻₂ = @. Geometry.project(Geometry.UVAxis(), (𝒻₂))
for ii in 1:6
Spaces.weighted_dss!(∇𝒻₂.components.data.:($ii))
end

# Check against known solution component-wise
@test parent(η_test1) parent(∇𝒻₂.components.data.:1)
@test parent(η_test2) parent(∇𝒻₂.components.data.:2)
@test parent(2 .* η_test1) parent(∇𝒻₂.components.data.:3)
@test parent(2 .* η_test2) parent(∇𝒻₂.components.data.:4)
@test parent(3 .* η_test1) parent(∇𝒻₂.components.data.:5)
@test parent(3 .* η_test2) parent(∇𝒻₂.components.data.:6)
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ UnitTest("Sphere spaces" ,"Spaces/sphere.jl"),
UnitTest("Fields" ,"Fields/unit_field.jl"), # has benchmarks
UnitTest("Spectral elem - rectilinear" ,"Operators/spectralelement/rectilinear.jl"),
UnitTest("Spectral elem - opt" ,"Operators/spectralelement/opt.jl"),
UnitTest("Spectral elem - gradient tensor" ,"Operators/spectralelement/covar_deriv_ops.jl"),
UnitTest("Spectral elem - Diffusion 2d" ,"Operators/spectralelement/unit_diffusion2d.jl"),
UnitTest("Spectral elem - sphere geometry" ,"Operators/spectralelement/sphere_geometry.jl"),
UnitTest("Spectral elem - sphere gradient" ,"Operators/spectralelement/sphere_gradient.jl"),
Expand Down

0 comments on commit 8be268a

Please sign in to comment.