Skip to content

Commit

Permalink
Add Nh to type parameter space
Browse files Browse the repository at this point in the history
  • Loading branch information
charleskawczynski committed Jul 23, 2024
1 parent 8c8b85a commit 32ec1fd
Show file tree
Hide file tree
Showing 29 changed files with 546 additions and 487 deletions.
16 changes: 8 additions & 8 deletions examples/hybrid/driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,10 @@ walltime = @elapsed sol = OrdinaryDiffEq.solve!(integrator)
any(isnan, sol.u[end]) && error("NaNs found in result.")

if is_distributed # replace sol.u on the root processor with the global sol.u
global_Y_c_1 =
DataLayouts.gather(comms_ctx, Fields.field_values(sol.u[1].c))
global_Y_f_1 =
DataLayouts.gather(comms_ctx, Fields.field_values(sol.u[1].f))
if ClimaComms.iamroot(comms_ctx)
global_h_space = make_horizontal_space(
horizontal_mesh,
Expand All @@ -194,14 +198,10 @@ if is_distributed # replace sol.u on the root processor with the global sol.u
)
global_center_space, global_face_space =
make_hybrid_spaces(global_h_space, z_max, z_elem; z_stretch)
global_Y_c_type = Fields.Field{
typeof(Fields.field_values(Y.c)),
typeof(global_center_space),
}
global_Y_f_type = Fields.Field{
typeof(Fields.field_values(Y.f)),
typeof(global_face_space),
}
global_Y_c_type =
Fields.Field{typeof(global_Y_c_1), typeof(global_center_space)}
global_Y_f_type =
Fields.Field{typeof(global_Y_f_1), typeof(global_face_space)}
global_Y_type = Fields.FieldVector{
FT,
NamedTuple{(:c, :f), Tuple{global_Y_c_type, global_Y_f_type}},
Expand Down
39 changes: 18 additions & 21 deletions ext/cuda/data_layouts_copyto.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@ function knl_copyto!(dest, src)
end

function Base.copyto!(
dest::IJFH{S, Nij},
bc::DataLayouts.BroadcastedUnionIJFH{S, Nij},
dest::IJFH{S, Nij, Nh},
bc::DataLayouts.BroadcastedUnionIJFH{S, Nij, Nh},
::ToCUDA,
) where {S, Nij}
_, _, _, _, Nh = size(bc)
) where {S, Nij, Nh}
if Nh > 0
auto_launch!(
knl_copyto!,
Expand All @@ -34,11 +33,10 @@ function Base.copyto!(
end

function Base.copyto!(
dest::VIJFH{S, Nv, Nij},
bc::DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij},
dest::VIJFH{S, Nv, Nij, Nh},
bc::DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij, Nh},
::ToCUDA,
) where {S, Nv, Nij}
_, _, _, _, Nh = size(bc)
) where {S, Nv, Nij, Nh}
if Nv > 0 && Nh > 0
Nv_per_block = min(Nv, fld(256, Nij * Nij))
Nv_blocks = cld(Nv, Nv_per_block)
Expand All @@ -58,14 +56,13 @@ function Base.copyto!(
bc::DataLayouts.BroadcastedUnionVF{S, Nv},
::ToCUDA,
) where {S, Nv}
_, _, _, _, Nh = size(dest)
if Nv > 0 && Nh > 0
if Nv > 0
auto_launch!(
knl_copyto!,
(dest, bc),
dest;
threads_s = (1, 1),
blocks_s = (Nh, Nv),
blocks_s = (1, Nv),
)
end
return dest
Expand Down Expand Up @@ -100,8 +97,8 @@ function knl_copyto_flat!(dest::AbstractData, bc)
end

function cuda_copyto!(dest::AbstractData, bc)
(_, _, Nf, Nv, Nh) = DataLayouts.universal_size(dest)
if Nv > 0 && Nh > 0 && Nf > 0
(_, _, Nv, Nh) = DataLayouts.universal_size(dest)
if Nv > 0 && Nh > 0
auto_launch!(knl_copyto_flat!, (dest, bc), dest; auto = true)
end
return dest
Expand All @@ -110,12 +107,12 @@ end
# TODO: can we use CUDA's luanch configuration for all data layouts?
# Currently, it seems to have a slight performance degredation.
#! format: off
# Base.copyto!(dest::IJFH{S, Nij}, bc::DataLayouts.BroadcastedUnionIJFH{S, Nij}, ::ToCUDA) where {S, Nij} = cuda_copyto!(dest, bc)
Base.copyto!(dest::IFH{S, Ni}, bc::DataLayouts.BroadcastedUnionIFH{S, Ni}, ::ToCUDA) where {S, Ni} = cuda_copyto!(dest, bc)
Base.copyto!(dest::IJF{S, Nij}, bc::DataLayouts.BroadcastedUnionIJF{S, Nij}, ::ToCUDA) where {S, Nij} = cuda_copyto!(dest, bc)
Base.copyto!(dest::IF{S, Ni}, bc::DataLayouts.BroadcastedUnionIF{S, Ni}, ::ToCUDA) where {S, Ni} = cuda_copyto!(dest, bc)
Base.copyto!(dest::VIFH{S, Nv, Ni}, bc::DataLayouts.BroadcastedUnionVIFH{S, Nv, Ni}, ::ToCUDA) where {S, Nv, Ni} = cuda_copyto!(dest, bc)
# Base.copyto!(dest::VIJFH{S, Nv, Nij}, bc::DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij}, ::ToCUDA) where {S, Nv, Nij} = cuda_copyto!(dest, bc)
# Base.copyto!(dest::VF{S, Nv}, bc::DataLayouts.BroadcastedUnionVF{S, Nv}, ::ToCUDA) where {S, Nv} = cuda_copyto!(dest, bc)
# Base.copyto!(dest::DataF{S}, bc::DataLayouts.BroadcastedUnionDataF{S}, ::ToCUDA) where {S} = cuda_copyto!(dest, bc)
# Base.copyto!(dest::IJFH{S, Nij}, bc::DataLayouts.BroadcastedUnionIJFH{S, Nij, Nh}, ::ToCUDA) where {S, Nij, Nh} = cuda_copyto!(dest, bc)
Base.copyto!(dest::IFH{S, Ni, Nh}, bc::DataLayouts.BroadcastedUnionIFH{S, Ni, Nh}, ::ToCUDA) where {S, Ni, Nh} = cuda_copyto!(dest, bc)
Base.copyto!(dest::IJF{S, Nij}, bc::DataLayouts.BroadcastedUnionIJF{S, Nij}, ::ToCUDA) where {S, Nij} = cuda_copyto!(dest, bc)
Base.copyto!(dest::IF{S, Ni}, bc::DataLayouts.BroadcastedUnionIF{S, Ni}, ::ToCUDA) where {S, Ni} = cuda_copyto!(dest, bc)
Base.copyto!(dest::VIFH{S, Nv, Ni, Nh}, bc::DataLayouts.BroadcastedUnionVIFH{S, Nv, Ni, Nh}, ::ToCUDA) where {S, Nv, Ni, Nh} = cuda_copyto!(dest, bc)
# Base.copyto!(dest::VIJFH{S, Nv, Nij, Nh}, bc::DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij, Nh}, ::ToCUDA) where {S, Nv, Nij, Nh} = cuda_copyto!(dest, bc)
# Base.copyto!(dest::VF{S, Nv}, bc::DataLayouts.BroadcastedUnionVF{S, Nv}, ::ToCUDA) where {S, Nv} = cuda_copyto!(dest, bc)
# Base.copyto!(dest::DataF{S}, bc::DataLayouts.BroadcastedUnionDataF{S}, ::ToCUDA) where {S} = cuda_copyto!(dest, bc)
#! format: on
4 changes: 2 additions & 2 deletions ext/cuda/data_layouts_fill.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ function knl_fill_flat!(dest::AbstractData, val)
end

function cuda_fill!(dest::AbstractData, val)
(_, _, Nf, Nv, Nh) = DataLayouts.universal_size(dest)
if Nv > 0 && Nh > 0 && Nf > 0
(_, _, Nv, Nh) = DataLayouts.universal_size(dest)
if Nv > 0 && Nh > 0
auto_launch!(knl_fill_flat!, (dest, val), dest; auto = true)
end
return dest
Expand Down
5 changes: 2 additions & 3 deletions ext/cuda/data_layouts_fused_copyto.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,9 @@ end

function fused_copyto!(
fmbc::FusedMultiBroadcast,
dest1::VIJFH{S, Nv, Nij},
dest1::VIJFH{S, Nv, Nij, Nh},
::ToCUDA,
) where {S, Nv, Nij}
_, _, _, _, Nh = size(dest1)
) where {S, Nv, Nij, Nh}
if Nv > 0 && Nh > 0
Nv_per_block = min(Nv, fld(256, Nij * Nij))
Nv_blocks = cld(Nv, Nv_per_block)
Expand Down
2 changes: 1 addition & 1 deletion lib/ClimaCorePlots/src/ClimaCorePlots.jl
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ function _unfolded_pannel_matrix(field, interpolate)
# TODO: inefficient memory wise, but good enough for now
panels = [fill(NaN, (panel_size * dof, panel_size * dof)) for _ in 1:6]

interpolated_data = DataLayouts.IJFH{FT, interpolate}(Array{FT}, nelem)
interpolated_data = DataLayouts.IJFH{FT, interpolate, nelem}(Array{FT})
field_data = Fields.field_values(field)

Operators.tensor_product!(interpolated_data, field_data, Imat)
Expand Down
Loading

0 comments on commit 32ec1fd

Please sign in to comment.