Skip to content

Commit

Permalink
Add Nh to type parameter space
Browse files Browse the repository at this point in the history
  • Loading branch information
charleskawczynski committed Jul 22, 2024
1 parent ef82fe7 commit 0197f76
Show file tree
Hide file tree
Showing 27 changed files with 538 additions and 473 deletions.
27 changes: 12 additions & 15 deletions ext/cuda/data_layouts_copyto.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@ function knl_copyto!(dest, src)
end

function Base.copyto!(
dest::IJFH{S, Nij},
bc::DataLayouts.BroadcastedUnionIJFH{S, Nij},
dest::IJFH{S, Nij, Nh},
bc::DataLayouts.BroadcastedUnionIJFH{S, Nij, Nh},
::ToCUDA,
) where {S, Nij}
_, _, _, _, Nh = size(bc)
) where {S, Nij, Nh}
if Nh > 0
auto_launch!(
knl_copyto!,
Expand All @@ -34,11 +33,10 @@ function Base.copyto!(
end

function Base.copyto!(
dest::VIJFH{S, Nv, Nij},
bc::DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij},
dest::VIJFH{S, Nv, Nij, Nh},
bc::DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij, Nh},
::ToCUDA,
) where {S, Nv, Nij}
_, _, _, _, Nh = size(bc)
) where {S, Nv, Nij, Nh}
if Nv > 0 && Nh > 0
Nv_per_block = min(Nv, fld(256, Nij * Nij))
Nv_blocks = cld(Nv, Nv_per_block)
Expand All @@ -58,14 +56,13 @@ function Base.copyto!(
bc::DataLayouts.BroadcastedUnionVF{S, Nv},
::ToCUDA,
) where {S, Nv}
_, _, _, _, Nh = size(dest)
if Nv > 0 && Nh > 0
if Nv > 0
auto_launch!(
knl_copyto!,
(dest, bc),
dest;
threads_s = (1, 1),
blocks_s = (Nh, Nv),
blocks_s = (1, Nv),
)
end
return dest
Expand Down Expand Up @@ -100,8 +97,8 @@ function knl_copyto_flat!(dest::AbstractData, bc)
end

function cuda_copyto!(dest::AbstractData, bc)
(_, _, Nf, Nv, Nh) = DataLayouts.universal_size(dest)
if Nv > 0 && Nh > 0 && Nf > 0
(_, _, _, Nv, Nh) = DataLayouts.universal_size(dest)
if Nv > 0 && Nh > 0
auto_launch!(knl_copyto_flat!, (dest, bc), dest; auto = true)
end
return dest
Expand All @@ -111,10 +108,10 @@ end
# Currently, it seems to have a slight performance degredation.
#! format: off
# Base.copyto!(dest::IJFH{S, Nij}, bc::DataLayouts.BroadcastedUnionIJFH{S, Nij}, ::ToCUDA) where {S, Nij} = cuda_copyto!(dest, bc)
Base.copyto!(dest::IFH{S, Ni}, bc::DataLayouts.BroadcastedUnionIFH{S, Ni}, ::ToCUDA) where {S, Ni} = cuda_copyto!(dest, bc)
Base.copyto!(dest::IFH{S, Ni}, bc::DataLayouts.BroadcastedUnionIFH{S, Ni, Nh}, ::ToCUDA) where {S, Ni} = cuda_copyto!(dest, bc)
Base.copyto!(dest::IJF{S, Nij}, bc::DataLayouts.BroadcastedUnionIJF{S, Nij}, ::ToCUDA) where {S, Nij} = cuda_copyto!(dest, bc)
Base.copyto!(dest::IF{S, Ni}, bc::DataLayouts.BroadcastedUnionIF{S, Ni}, ::ToCUDA) where {S, Ni} = cuda_copyto!(dest, bc)
Base.copyto!(dest::VIFH{S, Nv, Ni}, bc::DataLayouts.BroadcastedUnionVIFH{S, Nv, Ni}, ::ToCUDA) where {S, Nv, Ni} = cuda_copyto!(dest, bc)
Base.copyto!(dest::VIFH{S, Nv, Ni}, bc::DataLayouts.BroadcastedUnionVIFH{S, Nv, Ni, Nh}, ::ToCUDA) where {S, Nv, Ni} = cuda_copyto!(dest, bc)
# Base.copyto!(dest::VIJFH{S, Nv, Nij}, bc::DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij}, ::ToCUDA) where {S, Nv, Nij} = cuda_copyto!(dest, bc)
# Base.copyto!(dest::VF{S, Nv}, bc::DataLayouts.BroadcastedUnionVF{S, Nv}, ::ToCUDA) where {S, Nv} = cuda_copyto!(dest, bc)
# Base.copyto!(dest::DataF{S}, bc::DataLayouts.BroadcastedUnionDataF{S}, ::ToCUDA) where {S} = cuda_copyto!(dest, bc)
Expand Down
4 changes: 2 additions & 2 deletions ext/cuda/data_layouts_fill.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ function knl_fill_flat!(dest::AbstractData, val)
end

function cuda_fill!(dest::AbstractData, val)
(_, _, Nf, Nv, Nh) = DataLayouts.universal_size(dest)
if Nv > 0 && Nh > 0 && Nf > 0
(_, _, _, Nv, Nh) = DataLayouts.universal_size(dest)
if Nv > 0 && Nh > 0
auto_launch!(knl_fill_flat!, (dest, val), dest; auto = true)
end
return dest
Expand Down
5 changes: 2 additions & 3 deletions ext/cuda/data_layouts_fused_copyto.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,9 @@ end

function fused_copyto!(
fmbc::FusedMultiBroadcast,
dest1::VIJFH{S, Nv, Nij},
dest1::VIJFH{S, Nv, Nij, Nh},
::ToCUDA,
) where {S, Nv, Nij}
_, _, _, _, Nh = size(dest1)
) where {S, Nv, Nij, Nh}
if Nv > 0 && Nh > 0
Nv_per_block = min(Nv, fld(256, Nij * Nij))
Nv_blocks = cld(Nv, Nv_per_block)
Expand Down
2 changes: 1 addition & 1 deletion lib/ClimaCorePlots/src/ClimaCorePlots.jl
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ function _unfolded_pannel_matrix(field, interpolate)
# TODO: inefficient memory wise, but good enough for now
panels = [fill(NaN, (panel_size * dof, panel_size * dof)) for _ in 1:6]

interpolated_data = DataLayouts.IJFH{FT, interpolate}(Array{FT}, nelem)
interpolated_data = DataLayouts.IJFH{FT, interpolate, nelem}(Array{FT})
field_data = Fields.field_values(field)

Operators.tensor_product!(interpolated_data, field_data, Imat)
Expand Down
Loading

0 comments on commit 0197f76

Please sign in to comment.