Skip to content

Commit

Permalink
CPU <-> GPU covariant derivatives are now identical (towards tensor ops)
Browse files Browse the repository at this point in the history
	modified:   ext/cuda/operators_sem_shmem.jl
	modified:   ext/cuda/operators_spectral_element.jl
  • Loading branch information
Akshay Sridhar committed May 20, 2024
1 parent db8780f commit 3dc35be
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 14 deletions.
39 changes: 34 additions & 5 deletions ext/cuda/operators_sem_shmem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -94,23 +94,52 @@ Base.@propagate_inbounds function operator_shmem(
FT = Spaces.undertype(space)
QS = Spaces.quadrature_style(space)
Nq = Quadratures.degrees_of_freedom(QS)
# allocate temp output
IT = eltype(arg)
input = CUDA.CuStaticSharedArray(IT, (Nq, Nq, Nvt))
return (input,)
ET = eltype(IT)
RT = operator_return_eltype(op, IT)
if RT <: Geometry.Covariant12Vector
# allocate temp output
input = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
return (input,)
elseif IT <: Geometry.UVVector
# input data is a UVVector field
v₁ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
v₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
return (v₁, v₂)
elseif IT <: Geometry.UVWVector
# input data is a UVWVector field
v₁ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
v₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
v₃ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt))
return (v₁, v₂, v₃)
end
end

Base.@propagate_inbounds function operator_fill_shmem!(
op::Gradient{(1, 2)},
(input,),
input,
space,
ij,
slabidx,
arg,
)
vt = threadIdx().z
i, j = ij.I
input[i, j, vt] = arg
local_geometry = get_local_geometry(space, ij, slabidx)
RT = operator_return_eltype(op, typeof(arg))
if RT <: Geometry.Covariant12Vector
(v,) = input
v[i, j, vt] = arg
elseif typeof(arg) <: Geometry.UVVector
v₁, v₂ = input
v₁[i, j, vt] = Geometry.LocalVector(arg, local_geometry).u
v₂[i, j, vt] = Geometry.LocalVector(arg, local_geometry).v
elseif typeof(arg) <: Geometry.UVWVector
v₁, v₂, v₃ = input
v₁[i, j, vt] = Geometry.LocalVector(arg, local_geometry).u
v₂[i, j, vt] = Geometry.LocalVector(arg, local_geometry).v
v₃[i, j, vt] = Geometry.LocalVector(arg, local_geometry).w
end
end


Expand Down
54 changes: 45 additions & 9 deletions ext/cuda/operators_spectral_element.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import ClimaCore: Spaces, Quadratures, Topologies
import ClimaCore: Operators, Geometry, Quadratures, RecursiveApply
import ClimaComms
using CUDA: @cuda
using CUDA
import ClimaCore.Operators: AbstractSpectralStyle, strip_space
import ClimaCore.Operators: SpectralBroadcasted, set_node!, get_node
Expand Down Expand Up @@ -256,7 +255,7 @@ end

Base.@propagate_inbounds function operator_evaluate(
op::Gradient{(1, 2)},
(input,),
input,
space,
ij,
slabidx,
Expand All @@ -269,14 +268,51 @@ Base.@propagate_inbounds function operator_evaluate(
Nq = Quadratures.degrees_of_freedom(QS)
D = Quadratures.differentiation_matrix(FT, QS)

∂f∂ξ₁ = D[i, 1] * input[1, j, vt]
∂f∂ξ₂ = D[j, 1] * input[i, 1, vt]

for k in 2:Nq
∂f∂ξ₁ += D[i, k] * input[k, j, vt]
∂f∂ξ₂ += D[j, k] * input[i, k, vt]
if length(input) == 1 # check types
(v₁,) = input
∂f∂ξ₁ = D[i, 1] v₁[1, j, vt]
∂f∂ξ₂ = D[j, 1] v₁[i, 1, vt]
for k in 2:Nq
∂f∂ξ₁ = ∂f∂ξ₁ D[i, k] v₁[k, j, vt]
∂f∂ξ₂ = ∂f∂ξ₂ D[j, k] v₁[i, k, vt]
end
return Geometry.Covariant12Vector(∂f∂ξ₁, ∂f∂ξ₂)
elseif length(input) == 2
# Update `shmem`
v₁, v₂ = input
∂f₁∂ξ₁ = D[i, 1] v₁[1, j, vt]
∂f₁∂ξ₂ = D[j, 1] v₁[i, 1, vt]
∂f₂∂ξ₁ = D[i, 1] v₂[1, j, vt]
∂f₂∂ξ₂ = D[j, 1] v₂[i, 1, vt]
@simd for k in 2:Nq
∂f₁∂ξ₁ = ∂f₁∂ξ₁ D[i, k] v₁[k, j, vt]
∂f₁∂ξ₂ = ∂f₁∂ξ₂ D[j, k] v₁[i, k, vt]
∂f₂∂ξ₁ = ∂f₂∂ξ₁ D[i, k] v₂[k, j, vt]
∂f₂∂ξ₂ = ∂f₂∂ξ₂ D[j, k] v₂[i, k, vt]
end
return Geometry.AxisTensor((Geometry.Covariant12Axis(), Geometry.UVAxis()),
(∂f₁∂ξ₁, ∂f₂∂ξ₁,
∂f₁∂ξ₂, ∂f₂∂ξ₂))
else
v₁, v₂, v₃ = input
∂f₁∂ξ₁ = D[i, 1] v₁[1, j, vt]
∂f₁∂ξ₂ = D[j, 1] v₁[i, 1, vt]
∂f₂∂ξ₁ = D[i, 1] v₂[1, j, vt]
∂f₂∂ξ₂ = D[j, 1] v₂[i, 1, vt]
∂f₃∂ξ₁ = D[i, 1] v₃[1, j, vt]
∂f₃∂ξ₂ = D[j, 1] v₃[i, 1, vt]
@simd for k in 2:Nq
∂f₁∂ξ₁ = ∂f₁∂ξ₁ D[i, k] v₁[k, j, vt]
∂f₁∂ξ₂ = ∂f₁∂ξ₂ D[j, k] v₁[i, k, vt]
∂f₂∂ξ₁ = ∂f₂∂ξ₁ D[i, k] v₂[k, j, vt]
∂f₂∂ξ₂ = ∂f₂∂ξ₂ D[j, k] v₂[i, k, vt]
∂f₃∂ξ₁ = ∂f₃∂ξ₁ D[i, k] v₃[k, j, vt]
∂f₃∂ξ₂ = ∂f₃∂ξ₂ D[j, k] v₃[i, k, vt]
end
return Geometry.AxisTensor((Geometry.Covariant12Axis(), Geometry.UVWAxis()),
(∂f₁∂ξ₁, ∂f₂∂ξ₁, ∂f₃∂ξ₁,
∂f₁∂ξ₂, ∂f₂∂ξ₂, ∂f₃∂ξ₂))
end
return Geometry.Covariant12Vector(∂f∂ξ₁, ∂f∂ξ₂)
end

Base.@propagate_inbounds function operator_evaluate(
Expand Down

0 comments on commit 3dc35be

Please sign in to comment.