Skip to content

Commit

Permalink
Make Nh a dynamic parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
charleskawczynski committed Sep 25, 2024
1 parent 65d0e30 commit 1c42093
Show file tree
Hide file tree
Showing 34 changed files with 617 additions and 503 deletions.
8 changes: 4 additions & 4 deletions ext/cuda/data_layouts_copyto.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ function cuda_copyto!(dest::AbstractData, bc)
end

#! format: off
Base.copyto!(dest::IJFH{S, Nij}, bc::DataLayouts.BroadcastedUnionIJFH{S, Nij, Nh}, ::ToCUDA) where {S, Nij, Nh} = cuda_copyto!(dest, bc)
Base.copyto!(dest::IFH{S, Ni, Nh}, bc::DataLayouts.BroadcastedUnionIFH{S, Ni, Nh}, ::ToCUDA) where {S, Ni, Nh} = cuda_copyto!(dest, bc)
Base.copyto!(dest::IJFH{S, Nij}, bc::DataLayouts.BroadcastedUnionIJFH{S, Nij}, ::ToCUDA) where {S, Nij} = cuda_copyto!(dest, bc)
Base.copyto!(dest::IFH{S, Ni}, bc::DataLayouts.BroadcastedUnionIFH{S, Ni}, ::ToCUDA) where {S, Ni} = cuda_copyto!(dest, bc)
Base.copyto!(dest::IJF{S, Nij}, bc::DataLayouts.BroadcastedUnionIJF{S, Nij}, ::ToCUDA) where {S, Nij} = cuda_copyto!(dest, bc)
Base.copyto!(dest::IF{S, Ni}, bc::DataLayouts.BroadcastedUnionIF{S, Ni}, ::ToCUDA) where {S, Ni} = cuda_copyto!(dest, bc)
Base.copyto!(dest::VIFH{S, Nv, Ni, Nh}, bc::DataLayouts.BroadcastedUnionVIFH{S, Nv, Ni, Nh}, ::ToCUDA) where {S, Nv, Ni, Nh} = cuda_copyto!(dest, bc)
Base.copyto!(dest::VIJFH{S, Nv, Nij, Nh}, bc::DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij, Nh}, ::ToCUDA) where {S, Nv, Nij, Nh} = cuda_copyto!(dest, bc)
Base.copyto!(dest::VIFH{S, Nv, Ni}, bc::DataLayouts.BroadcastedUnionVIFH{S, Nv, Ni}, ::ToCUDA) where {S, Nv, Ni} = cuda_copyto!(dest, bc)
Base.copyto!(dest::VIJFH{S, Nv, Nij}, bc::DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij}, ::ToCUDA) where {S, Nv, Nij} = cuda_copyto!(dest, bc)
Base.copyto!(dest::VF{S, Nv}, bc::DataLayouts.BroadcastedUnionVF{S, Nv}, ::ToCUDA) where {S, Nv} = cuda_copyto!(dest, bc)
Base.copyto!(dest::DataF{S}, bc::DataLayouts.BroadcastedUnionDataF{S}, ::ToCUDA) where {S} = cuda_copyto!(dest, bc)
#! format: on
7 changes: 4 additions & 3 deletions ext/cuda/matrix_fields_multiple_field_solve.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@ NVTX.@annotate function multiple_field_solve!(

device = ClimaComms.device(x[first(names)])

args = (device, caches, xs, As, bs, x1, Val(Nnames))

us = UniversalSize(Fields.field_values(x1))

args = (device, caches, xs, As, bs, x1, us, Val(Nnames))

nitems = Ni * Nj * Nh * Nnames
threads = threads_via_occupancy(multiple_field_solve_kernel!, args)
n_max_threads = min(threads, nitems)
Expand Down Expand Up @@ -85,10 +86,10 @@ function multiple_field_solve_kernel!(
As,
bs,
x1,
us,
::Val{Nnames},
) where {Nnames}
@inbounds begin
us = UniversalSize(Fields.field_values(x1))
(I, iname) = multiple_field_solve_universal_index()
if multiple_field_solve_is_valid_index(I, us)
(i, j, _, _, h) = I.I
Expand Down
6 changes: 4 additions & 2 deletions ext/cuda/operators_integral.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ function column_reduce_device!(
space,
) where {F, T}
Ni, Nj, _, _, Nh = size(Fields.field_values(output))
us = UniversalSize(Fields.field_values(output))
args = (
single_column_reduce!,
f,
Expand All @@ -27,8 +28,8 @@ function column_reduce_device!(
strip_space(input, space),
init,
space,
us,
)
us = UniversalSize(Fields.field_values(output))
nitems = Ni * Nj * Nh
threads = threads_via_occupancy(bycolumn_kernel!, args)
n_max_threads = min(threads, nitems)
Expand Down Expand Up @@ -59,6 +60,7 @@ function column_accumulate_device!(
strip_space(input, space),
init,
space,
us,
)
Ni, Nj, _, _, Nh = size(Fields.field_values(output))
nitems = Ni * Nj * Nh
Expand All @@ -81,12 +83,12 @@ bycolumn_kernel!(
input,
init,
space,
us,
) where {S, F, T} =
if space isa Spaces.FiniteDifferenceSpace
single_column_function!(f, transform, output, input, init, space)
else
I = columnwise_universal_index()
us = UniversalSize(Fields.field_values(output))
if columnwise_is_valid_index(I, us)
(i, j, _, _, h) = I.I
single_column_function!(
Expand Down
4 changes: 2 additions & 2 deletions ext/cuda/operators_thomas_algorithm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import CUDA
using CUDA: @cuda
function column_thomas_solve!(::ClimaComms.CUDADevice, A, b)
us = UniversalSize(Fields.field_values(A))
args = (A, b)
args = (A, b, us)
Ni, Nj, _, _, Nh = size(Fields.field_values(A))
threads = threads_via_occupancy(thomas_algorithm_kernel!, args)
nitems = Ni * Nj * Nh
Expand All @@ -23,9 +23,9 @@ end
function thomas_algorithm_kernel!(
A::Fields.ExtrudedFiniteDifferenceField,
b::Fields.ExtrudedFiniteDifferenceField,
us::DataLayouts.UniversalSize,
)
I = columnwise_universal_index()
us = UniversalSize(Fields.field_values(A))
if columnwise_is_valid_index(I, us)
(i, j, _, _, h) = I.I
thomas_algorithm!(Spaces.column(A, i, j, h), Spaces.column(b, i, j, h))
Expand Down
2 changes: 1 addition & 1 deletion lib/ClimaCorePlots/src/ClimaCorePlots.jl
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ function _unfolded_pannel_matrix(field, interpolate)
# TODO: inefficient memory wise, but good enough for now
panels = [fill(NaN, (panel_size * dof, panel_size * dof)) for _ in 1:6]

interpolated_data = DataLayouts.IJFH{FT, interpolate, nelem}(Array{FT})
interpolated_data = DataLayouts.IJFH{FT, interpolate}(Array{FT}, nelem)
field_data = Fields.field_values(field)

Operators.tensor_product!(interpolated_data, field_data, Imat)
Expand Down
Loading

0 comments on commit 1c42093

Please sign in to comment.