From d2627b1d0dcd6a3e73a554944fcf4e39e2447d0e Mon Sep 17 00:00:00 2001 From: Dennis Yatunin Date: Wed, 24 Jul 2024 18:10:53 -0700 Subject: [PATCH] Add column_accumulate and generalize column_reduce --- .buildkite/Manifest.toml | 4 +- benchmarks/bickleyjet/Manifest.toml | 19 +- docs/src/operators.md | 3 +- ext/cuda/operators_integral.jl | 181 +++---- lib/ClimaCoreMakie/examples/Manifest.toml | 19 +- src/Fields/fieldvector.jl | 5 + src/Fields/indices.jl | 38 +- src/Operators/common.jl | 5 + src/Operators/finitedifference.jl | 9 + src/Operators/integrals.jl | 570 +++++++++++----------- src/interface.jl | 2 + test/Operators/integrals.jl | 214 +++----- 12 files changed, 471 insertions(+), 598 deletions(-) diff --git a/.buildkite/Manifest.toml b/.buildkite/Manifest.toml index 8bcfa5f4d4..cc8bdb2735 100644 --- a/.buildkite/Manifest.toml +++ b/.buildkite/Manifest.toml @@ -295,9 +295,9 @@ weakdeps = ["SparseArrays"] ChainRulesCoreSparseArraysExt = "SparseArrays" [[deps.ClimaComms]] -git-tree-sha1 = "2ca8c9ca6131a7be8ca262e6db79bc7aa94ab597" +git-tree-sha1 = "ec303a4a66dc0a0ebe15a639a7e685afeaa0daef" uuid = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" -version = "0.6.3" +version = "0.6.4" weakdeps = ["CUDA", "MPI"] [deps.ClimaComms.extensions] diff --git a/benchmarks/bickleyjet/Manifest.toml b/benchmarks/bickleyjet/Manifest.toml index 180b3813e0..7fb62adb21 100644 --- a/benchmarks/bickleyjet/Manifest.toml +++ b/benchmarks/bickleyjet/Manifest.toml @@ -188,9 +188,9 @@ uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a" version = "1.18.0+2" [[deps.ClimaComms]] -git-tree-sha1 = "2ca8c9ca6131a7be8ca262e6db79bc7aa94ab597" +git-tree-sha1 = "ec303a4a66dc0a0ebe15a639a7e685afeaa0daef" uuid = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" -version = "0.6.3" +version = "0.6.4" [deps.ClimaComms.extensions] ClimaCommsCUDAExt = "CUDA" @@ -201,10 +201,10 @@ version = "0.6.3" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" [[deps.ClimaCore]] -deps = ["Adapt", "BandedMatrices", "BlockArrays", "ClimaComms", "CubedSphere", "DataStructures", "DocStringExtensions", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LinearAlgebra", "MultiBroadcastFusion", "NVTX", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "Static", "StaticArrays", "Statistics", "Unrolled"] +deps = ["Adapt", "BandedMatrices", "BlockArrays", "ClimaComms", "CubedSphere", "DataStructures", "DocStringExtensions", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LinearAlgebra", "MultiBroadcastFusion", "NVTX", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "StaticArrays", "Statistics", "Unrolled"] path = "../.." uuid = "d414da3d-4745-48bb-8d80-42e94e092884" -version = "0.14.9" +version = "0.14.10" [deps.ClimaCore.extensions] ClimaCoreCUDAExt = "CUDA" @@ -586,11 +586,6 @@ git-tree-sha1 = "ca0f6bf568b4bfc807e7537f081c81e35ceca114" uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8" version = "2.10.0+0" -[[deps.IfElse]] -git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1" -uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" -version = "0.1.1" - [[deps.InlineStrings]] deps = ["Parsers"] git-tree-sha1 = "86356004f30f8e737eff143d57d41bd580e437aa" @@ -1282,12 +1277,6 @@ version = "2.4.0" [deps.SpecialFunctions.weakdeps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -[[deps.Static]] -deps = ["IfElse"] -git-tree-sha1 = "d2fdac9ff3906e27f7a618d47b676941baa6c80c" -uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" -version = "0.8.10" - [[deps.StaticArrays]] deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"] git-tree-sha1 = "6e00379a24597be4ae1ee6b2d882e15392040132" diff --git a/docs/src/operators.md b/docs/src/operators.md index fc10835140..559c28aec7 100644 --- a/docs/src/operators.md +++ b/docs/src/operators.md @@ -102,7 +102,8 @@ Extrapolate ```@docs column_integral_definite! column_integral_indefinite! -column_mapreduce! +column_reduce! +column_accumulate! ``` ## Internal APIs diff --git a/ext/cuda/operators_integral.jl b/ext/cuda/operators_integral.jl index cdff4dd00f..651a010a47 100644 --- a/ext/cuda/operators_integral.jl +++ b/ext/cuda/operators_integral.jl @@ -1,126 +1,83 @@ -import ClimaCore: Spaces, Fields, Spaces, Topologies -import ClimaCore.Operators: strip_space +import ClimaCore: Spaces, Fields, level, column import ClimaCore.Operators: - column_integral_definite!, - column_integral_definite_kernel!, - column_integral_indefinite_kernel!, - column_integral_indefinite!, - column_mapreduce_device!, - _column_integral_definite!, - _column_integral_indefinite! - + left_idx, + strip_space, + column_reduce_device!, + single_column_reduce!, + column_accumulate_device!, + single_column_accumulate! import ClimaComms using CUDA: @cuda -function column_integral_definite!( +function column_reduce_device!( ::ClimaComms.CUDADevice, - ∫field::Fields.Field, - ᶜfield::Fields.Field, -) - space = axes(∫field) - Ni, Nj, _, _, Nh = size(Fields.field_values(∫field)) - nthreads, nblocks = _configure_threadblock(Ni * Nj * Nh) - args = (strip_space(∫field, space), strip_space(ᶜfield, space)) - auto_launch!( - column_integral_definite_kernel!, - args, - size(Fields.field_values(∫field)); - threads_s = nthreads, - blocks_s = nblocks, - ) -end - -function column_integral_definite_kernel!( - ∫field, - ᶜfield::Fields.CenterExtrudedFiniteDifferenceField, -) - idx = threadIdx().x + (blockIdx().x - 1) * blockDim().x - Ni, Nj, _, _, Nh = size(Fields.field_values(ᶜfield)) - if idx <= Ni * Nj * Nh - i, j, h = cart_ind((Ni, Nj, Nh), idx).I - ∫field_column = Spaces.column(∫field, i, j, h) - ᶜfield_column = Spaces.column(ᶜfield, i, j, h) - _column_integral_definite!(∫field_column, ᶜfield_column) - end - return nothing -end - -function column_integral_indefinite_kernel!( - ᶠ∫field::Fields.FaceExtrudedFiniteDifferenceField, - ᶜfield::Fields.CenterExtrudedFiniteDifferenceField, -) - idx = threadIdx().x + (blockIdx().x - 1) * blockDim().x - Ni, Nj, _, _, Nh = size(Fields.field_values(ᶜfield)) - if idx <= Ni * Nj * Nh - i, j, h = cart_ind((Ni, Nj, Nh), idx).I - ᶠ∫field_column = Spaces.column(ᶠ∫field, i, j, h) - ᶜfield_column = Spaces.column(ᶜfield, i, j, h) - _column_integral_indefinite!(ᶠ∫field_column, ᶜfield_column) - end - return nothing -end - -function column_integral_indefinite!( - ::ClimaComms.CUDADevice, - ᶠ∫field::Fields.Field, - ᶜfield::Fields.Field, -) - Ni, Nj, _, _, Nh = size(Fields.field_values(ᶠ∫field)) - nthreads, nblocks = _configure_threadblock(Ni * Nj * Nh) - args = (ᶠ∫field, ᶜfield) - auto_launch!( - column_integral_indefinite_kernel!, - args, - size(Fields.field_values(ᶠ∫field)); - threads_s = nthreads, - blocks_s = nblocks, + f::F, + transform::T, + output, + input, + init, + space, +) where {F, T} + Ni, Nj, _, _, Nh = size(Fields.field_values(output)) + threads_s, blocks_s = _configure_threadblock(Ni * Nj * Nh) + args = ( + single_column_reduce!, + f, + transform, + strip_space(output, axes(output)), # The output space is irrelevant here + strip_space(input, space), + init, + space, ) + auto_launch!(bycolumn_kernel!, args, (); threads_s, blocks_s) end -function column_mapreduce_device!( +function column_accumulate_device!( ::ClimaComms.CUDADevice, - fn::F, - op::O, - reduced_field::Fields.Field, - fields::Fields.Field..., -) where {F, O} - Ni, Nj, _, _, Nh = size(Fields.field_values(reduced_field)) - nthreads, nblocks = _configure_threadblock(Ni * Nj * Nh) - kernel! = if first(fields) isa Fields.ExtrudedFiniteDifferenceField - column_mapreduce_kernel_extruded! - else - column_mapreduce_kernel! - end + f::F, + transform::T, + output, + input, + init, + space, +) where {F, T} + Ni, Nj, _, _, Nh = size(Fields.field_values(output)) + threads_s, blocks_s = _configure_threadblock(Ni * Nj * Nh) args = ( - fn, - op, - # reduced_field, - strip_space(reduced_field, axes(reduced_field)), - # fields..., - map(field -> strip_space(field, axes(field)), fields)..., - ) - auto_launch!( - kernel!, - args, - size(Fields.field_values(reduced_field)); - threads_s = nthreads, - blocks_s = nblocks, + single_column_accumulate!, + f, + transform, + strip_space(output, space), + strip_space(input, space), + init, + space, ) + auto_launch!(bycolumn_kernel!, args, (); threads_s, blocks_s) end -function column_mapreduce_kernel_extruded!( - fn::F, - op::O, - reduced_field, - fields..., -) where {F, O} - idx = threadIdx().x + (blockIdx().x - 1) * blockDim().x - Ni, Nj, _, _, Nh = size(Fields.field_values(reduced_field)) - if idx <= Ni * Nj * Nh - i, j, h = cart_ind((Ni, Nj, Nh), idx).I - reduced_field_column = Spaces.column(reduced_field, i, j, h) - field_columns = map(field -> Spaces.column(field, i, j, h), fields) - _column_mapreduce!(fn, op, reduced_field_column, field_columns...) +bycolumn_kernel!( + single_column_function!::S, + f::F, + transform::T, + output, + input, + init, + space, +) where {S, F, T} = + if space isa Spaces.FiniteDifferenceSpace + single_column_function!(f, transform, output, input, init, space) + else + idx = threadIdx().x + (blockIdx().x - 1) * blockDim().x + Ni, Nj, _, _, Nh = size(Fields.field_values(output)) + if idx <= Ni * Nj * Nh + i, j, h = cart_ind((Ni, Nj, Nh), idx).I + single_column_function!( + f, + transform, + column(output, i, j, h), + column(input, i, j, h), + init, + column(space, i, j, h), + ) + end end - return nothing -end diff --git a/lib/ClimaCoreMakie/examples/Manifest.toml b/lib/ClimaCoreMakie/examples/Manifest.toml index f29840e4e2..a47c23b9e5 100644 --- a/lib/ClimaCoreMakie/examples/Manifest.toml +++ b/lib/ClimaCoreMakie/examples/Manifest.toml @@ -205,9 +205,9 @@ weakdeps = ["SparseArrays"] ChainRulesCoreSparseArraysExt = "SparseArrays" [[deps.ClimaComms]] -git-tree-sha1 = "2ca8c9ca6131a7be8ca262e6db79bc7aa94ab597" +git-tree-sha1 = "ec303a4a66dc0a0ebe15a639a7e685afeaa0daef" uuid = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" -version = "0.6.3" +version = "0.6.4" [deps.ClimaComms.extensions] ClimaCommsCUDAExt = "CUDA" @@ -218,10 +218,10 @@ version = "0.6.3" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" [[deps.ClimaCore]] -deps = ["Adapt", "BandedMatrices", "BlockArrays", "ClimaComms", "CubedSphere", "DataStructures", "DocStringExtensions", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LinearAlgebra", "MultiBroadcastFusion", "NVTX", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "Static", "StaticArrays", "Statistics", "Unrolled"] +deps = ["Adapt", "BandedMatrices", "BlockArrays", "ClimaComms", "CubedSphere", "DataStructures", "DocStringExtensions", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LinearAlgebra", "MultiBroadcastFusion", "NVTX", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "StaticArrays", "Statistics", "Unrolled"] path = "../../.." uuid = "d414da3d-4745-48bb-8d80-42e94e092884" -version = "0.14.9" +version = "0.14.10" [deps.ClimaCore.extensions] ClimaCoreCUDAExt = "CUDA" @@ -754,11 +754,6 @@ git-tree-sha1 = "47ac8cc196b81001a711f4b2c12c97372338f00c" uuid = "7073ff75-c697-5162-941a-fcdaad2a7d2a" version = "1.24.2" -[[deps.IfElse]] -git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1" -uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" -version = "0.1.1" - [[deps.ImageAxes]] deps = ["AxisArrays", "ImageBase", "ImageCore", "Reexport", "SimpleTraits"] git-tree-sha1 = "2e4520d67b0cef90865b3ef727594d2a58e0e1f8" @@ -1633,12 +1628,6 @@ git-tree-sha1 = "46e589465204cd0c08b4bd97385e4fa79a0c770c" uuid = "cae243ae-269e-4f55-b966-ac2d0dc13c15" version = "0.1.1" -[[deps.Static]] -deps = ["IfElse"] -git-tree-sha1 = "d2fdac9ff3906e27f7a618d47b676941baa6c80c" -uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" -version = "0.8.10" - [[deps.StaticArrays]] deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"] git-tree-sha1 = "6e00379a24597be4ae1ee6b2d882e15392040132" diff --git a/src/Fields/fieldvector.jl b/src/Fields/fieldvector.jl index d9853e5a65..821e75adc8 100644 --- a/src/Fields/fieldvector.jl +++ b/src/Fields/fieldvector.jl @@ -136,6 +136,11 @@ Base.similar(fv::FieldVector{T}, ::Type{T′}) where {T, T′} = Base.copy(fv::FieldVector{T}) where {T} = FieldVector{T}(map(copy, _values(fv))) Base.zero(fv::FieldVector{T}) where {T} = FieldVector{T}(map(zero, _values(fv))) +Base.@propagate_inbounds slab(fv::FieldVector{T}, inds...) where {T} = + FieldVector{T}(slab_args(_values(fv), inds...)) +Base.@propagate_inbounds column(fv::FieldVector{T}, inds...) where {T} = + FieldVector{T}(column_args(_values(fv), inds...)) + struct FieldVectorStyle <: Base.Broadcast.AbstractArrayStyle{1} end Base.Broadcast.BroadcastStyle(::Type{<:FieldVector}) = FieldVectorStyle() diff --git a/src/Fields/indices.jl b/src/Fields/indices.jl index 51b6a2a5ce..eb1e403c8e 100644 --- a/src/Fields/indices.jl +++ b/src/Fields/indices.jl @@ -1,37 +1,9 @@ - -Base.@propagate_inbounds Base.getindex(field::Field, colidx::ColumnIndex) = - column(field, colidx) -Base.@propagate_inbounds function Base.getindex( - fv::FieldVector{T}, +const ColumnIndexable = + Union{Field, FieldVector, Base.AbstractBroadcasted, Spaces.AbstractSpace} +Base.@propagate_inbounds Base.getindex( + x::ColumnIndexable, colidx::ColumnIndex, -) where {T} - values = map(x -> x[colidx], _values(fv)) - return FieldVector{T, typeof(values)}(values) -end -Base.@propagate_inbounds function column( - field::SpectralElementField1D, - colidx::ColumnIndex{1}, -) - column(field, colidx.ij[1], colidx.h) -end -Base.@propagate_inbounds function column( - field::ExtrudedFiniteDifferenceField, - colidx::ColumnIndex{1}, -) - column(field, colidx.ij[1], colidx.h) -end -Base.@propagate_inbounds function column( - field::SpectralElementField2D, - colidx::ColumnIndex{2}, -) - column(field, colidx.ij[1], colidx.ij[2], colidx.h) -end -Base.@propagate_inbounds function column( - field::ExtrudedFiniteDifferenceField, - colidx::ColumnIndex{2}, -) - column(field, colidx.ij[1], colidx.ij[2], colidx.h) -end +) = column(x, colidx.ij..., colidx.h) """ Fields.bycolumn(fn, space) diff --git a/src/Operators/common.jl b/src/Operators/common.jl index 44a5ddef17..b86123e97b 100644 --- a/src/Operators/common.jl +++ b/src/Operators/common.jl @@ -139,3 +139,8 @@ end strip_space_args(::Tuple{}, space) = () strip_space_args(args::Tuple, space) = (strip_space(args[1], space), strip_space_args(Base.tail(args), space)...) + +function unstrip_space(field::Field, parent_space) + new_space = reconstruct_placeholder_space(axes(field), parent_space) + return Field(Fields.field_values(field), new_space) +end diff --git a/src/Operators/finitedifference.jl b/src/Operators/finitedifference.jl index 9e1143211a..207808a514 100644 --- a/src/Operators/finitedifference.jl +++ b/src/Operators/finitedifference.jl @@ -3299,6 +3299,15 @@ function Base.Broadcast.materialize!( ) end +Base.@propagate_inbounds column(op::FiniteDifferenceOperator, inds...) = + unionall_type(typeof(op))(column_args(op.bcs, inds...)) +Base.@propagate_inbounds column(sbc::StencilBroadcasted{S}, inds...) where {S} = + StencilBroadcasted{S}( + column(sbc.op, inds...), + column_args(sbc.args, inds...), + column(sbc.axes, inds...), + ) + #TODO: the optimizer dies with column broadcast expressions over a certain complexity if hasfield(Method, :recursion_relation) dont_limit = (args...) -> true diff --git a/src/Operators/integrals.jl b/src/Operators/integrals.jl index d13613efe3..7b20325b9e 100644 --- a/src/Operators/integrals.jl +++ b/src/Operators/integrals.jl @@ -1,315 +1,333 @@ -import ..RecursiveApply: rzero, ⊠, ⊞, radd, rmul -import ..Utilities -import ..DataLayouts +import ..RecursiveApply: rzero, ⊠, ⊞ import RootSolvers import ClimaComms """ - column_integral_definite!(∫field::Field, ᶜfield::Field) + column_integral_definite!(∫field, ᶜfield, [init]) -Sets `∫field```{}= \\int_0^{z_{max}}\\,```ᶜfield```(z)\\,dz``, where ``z_{max}`` -is the value of `z` at the top of the domain. The input `ᶜfield` must lie on a -cell-center space, and the output `∫field` must lie on the corresponding -horizontal space. +Sets `∫field```{}= \\int_{z_{min}}^{z_{max}}\\,```ᶜfield```(z)\\,dz +{}```init`, +where ``z_{min}`` and ``z_{max}`` are the values of `z` at the bottom and top of +the domain, respectively. The input `ᶜfield` must lie on a cell-center space, +and the output `∫field` must lie on the corresponding horizontal space. The +default value of `init` is 0. """ -column_integral_definite!(∫field::Fields.Field, ᶜfield::Fields.Field) = - column_integral_definite!(ClimaComms.device(axes(ᶜfield)), ∫field, ᶜfield) - -function column_integral_definite_kernel!( - ∫field, - ᶜfield::Fields.CenterFiniteDifferenceField, -) - _column_integral_definite!(∫field, ᶜfield) - return nothing -end - -column_integral_definite!( - ::ClimaComms.AbstractCPUDevice, - ∫field::Fields.SpectralElementField, - ᶜfield::Fields.ExtrudedFiniteDifferenceField, -) = - Fields.bycolumn(axes(ᶜfield)) do colidx - _column_integral_definite!(∫field[colidx], ᶜfield[colidx]) - end - -column_integral_definite!( - ::ClimaComms.AbstractCPUDevice, - ∫field::Fields.PointField, - ᶜfield::Fields.FiniteDifferenceField, -) = _column_integral_definite!(∫field, ᶜfield) - -function _column_integral_definite!(∫field, ᶜfield::Fields.ColumnField) - Δz = Fields.Δz_field(ᶜfield) - first_level = Operators.left_idx(axes(ᶜfield)) - last_level = Operators.right_idx(axes(ᶜfield)) - ∫field_data = Fields.field_values(∫field) - Base.setindex!(∫field_data, rzero(eltype(∫field))) - @inbounds for level in first_level:last_level - val = - ∫field_data[] ⊞ - Fields.level(ᶜfield, level)[] ⊠ Fields.level(Δz, level)[] - Base.setindex!(∫field_data, val) - end - return nothing +function column_integral_definite!(∫field, ᶜfield, init = rzero(eltype(∫field))) + ᶜfield_times_Δz = + Base.Broadcast.broadcasted(⊠, ᶜfield, Fields.Δz_field(ᶜfield)) + column_reduce!(⊞, ∫field, ᶜfield_times_Δz; init) end """ - column_integral_indefinite!(ᶠ∫field::Field, ᶜfield::Field) - -Sets `ᶠ∫field```(z) = \\int_0^z\\,```ᶜfield```(z')\\,dz'``. The input `ᶜfield` -must lie on a cell-center space, and the output `ᶠ∫field` must lie on the -corresponding cell-face space. + column_integral_indefinite!(ᶠ∫field, ᶜfield, [init]) - column_integral_indefinite!( - f::Function, - ᶠ∫field::Fields.ColumnField, - ϕ₀ = 0, - average = (ϕ⁻, ϕ⁺) -> (ϕ⁻ + ϕ⁺) / 2, - ) +Sets `ᶠ∫field```(z) = \\int_{z_{min}}^z\\,```ᶜfield```(z')\\,dz' +{}```init`, +where ``z_{min}`` is the value of `z` at the bottom of the domain. The input +`ᶜfield` must lie on a cell-center space, and the output `ᶠ∫field` must lie on +the corresponding cell-face space. The default value of `init` is 0. -The indefinite integral `ᶠ∫field = ϕ(z) = ∫ f(ϕ,z) dz` given: + column_integral_indefinite!(∂ϕ∂z, ᶠϕ, [ϕ₀], [rtol]) -- `f` the integral integrand function (which may be a function) -- `ᶠ∫field` the resulting (scalar) field `ϕ(z)` -- `ϕ₀` (optional) the boundary condition -- `average` (optional) a function to compute the cell center - average between two cell faces (`ϕ⁻, ϕ⁺`). +Sets `ᶠϕ```(z) = \\int_{z_{min}}^z\\,```∂ϕ∂z```(```ᶠϕ```(z'), z')\\,dz' +{}```ϕ₀` +for any scalar-valued two-argument function `∂ϕ∂z`. That is, the output `ᶠϕ` is +such that `ᶜgradᵥ.(ᶠϕ) ≈ ∂ϕ∂z.(ᶜint.(ᶠϕ), ᶜz)`, where `ᶜgradᵥ = GradientF2C()`, +`ᶜint = InterpolateF2C()`, and `ᶜz = Fields.coordinate_field(ᶜint.(ᶠϕ)).z`. The +approximation is accurate to a relative tolerance of `rtol`. The default value +of `ϕ₀` is 0, and the default value of `rtol` is 0.001. """ -column_integral_indefinite!(ᶠ∫field::Fields.Field, ᶜfield::Fields.Field) = - column_integral_indefinite!(ClimaComms.device(ᶠ∫field), ᶠ∫field, ᶜfield) - -column_integral_indefinite_kernel!( - ᶠ∫field::Fields.FaceFiniteDifferenceField, - ᶜfield::Fields.CenterFiniteDifferenceField, -) = _column_integral_indefinite!(ᶠ∫field, ᶜfield) - -column_integral_indefinite!( - ::ClimaComms.AbstractCPUDevice, - ᶠ∫field::Fields.FaceExtrudedFiniteDifferenceField, - ᶜfield::Fields.CenterExtrudedFiniteDifferenceField, -) = - Fields.bycolumn(axes(ᶜfield)) do colidx - _column_integral_indefinite!(ᶠ∫field[colidx], ᶜfield[colidx]) - end - -column_integral_indefinite!( - ::ClimaComms.AbstractCPUDevice, - ᶠ∫field::Fields.FaceFiniteDifferenceField, - ᶜfield::Fields.CenterFiniteDifferenceField, -) = _column_integral_indefinite!(ᶠ∫field, ᶜfield) - -function _column_integral_indefinite!( - ᶠ∫field::Fields.ColumnField, - ᶜfield::Fields.ColumnField, +function column_integral_indefinite!( + ᶠ∫field, + ᶜfield, + init = rzero(eltype(ᶠ∫field)), ) - face_space = axes(ᶠ∫field) - ᶜΔz = Fields.Δz_field(ᶜfield) - first_level = Operators.left_idx(face_space) - last_level = Operators.right_idx(face_space) - @inbounds Fields.level(ᶠ∫field, first_level)[] = rzero(eltype(ᶜfield)) - @inbounds for level in (first_level + 1):last_level - Fields.level(ᶠ∫field, level)[] = - Fields.level(ᶠ∫field, level - 1)[] ⊞ - Fields.level(ᶜfield, level - half)[] ⊠ - Fields.level(ᶜΔz, level - half)[] + ᶜfield_times_Δz = + Base.Broadcast.broadcasted(⊠, ᶜfield, Fields.Δz_field(ᶜfield)) + column_accumulate!(⊞, ᶠ∫field, ᶜfield_times_Δz; init) +end +function column_integral_indefinite!( + ∂ϕ∂z::F, + ᶠϕ, + ϕ₀ = eltype(ᶠϕ)(0), + rtol = eltype(ᶠϕ)(0.001), +) where {F <: Function} + device = ClimaComms.device(ᶠϕ) + face_space = axes(ᶠϕ) + center_space = if face_space isa Spaces.FaceFiniteDifferenceSpace + Spaces.CenterFiniteDifferenceSpace(face_space) + elseif face_space isa Spaces.FaceExtrudedFiniteDifferenceSpace + Spaces.CenterExtrudedFiniteDifferenceSpace(face_space) + else + error("output of column_integral_indefinite! must be on cell faces") + end + ᶜz = Fields.coordinate_field(center_space).z + ᶜΔz = Fields.Δz_field(center_space) + ᶜz_and_Δz = Base.Broadcast.broadcasted(tuple, ᶜz, ᶜΔz) + column_accumulate!(ᶠϕ, ᶜz_and_Δz; init = ϕ₀) do ϕ_prev, (z, Δz) + residual(ϕ_new) = (ϕ_new - ϕ_prev) / Δz - ∂ϕ∂z((ϕ_prev + ϕ_new) / 2, z) + (; converged, root) = RootSolvers.find_zero( + residual, + RootSolvers.NewtonsMethodAD(ϕ_prev), + RootSolvers.CompactSolution(), + RootSolvers.RelativeSolutionTolerance(rtol), + ) + ClimaComms.@assert device converged "∂ϕ∂z could not be integrated over \ + z = $z with rtol set to $rtol" + return root end end -dual_space(space::Spaces.FaceExtrudedFiniteDifferenceSpace) = - Spaces.CenterExtrudedFiniteDifferenceSpace(space) -dual_space(space::Spaces.CenterExtrudedFiniteDifferenceSpace) = - Spaces.FaceExtrudedFiniteDifferenceSpace(space) +################################################################################ -dual_space(space::Spaces.FaceFiniteDifferenceSpace) = - Spaces.CenterFiniteDifferenceSpace(space) -dual_space(space::Spaces.CenterFiniteDifferenceSpace) = - Spaces.FaceFiniteDifferenceSpace(space) +const PointwiseOrColumnwiseBroadcasted = Union{ + Base.Broadcast.Broadcasted{ + <:Union{Fields.FieldStyle, Operators.AbstractStencilStyle}, + }, + Operators.StencilBroadcasted, +} -# First, dispatch on device: -column_integral_indefinite!( - f::Function, - ᶠ∫field::Fields.Field, - ϕ₀ = zero(eltype(ᶠ∫field)), - average = (ϕ⁻, ϕ⁺) -> (ϕ⁻ + ϕ⁺) / 2, -) = column_integral_indefinite!( - f, - ClimaComms.device(ᶠ∫field), - ᶠ∫field, - ϕ₀, - average, +Base.@propagate_inbounds function get_level_value( + space::Spaces.FiniteDifferenceSpace, + field_or_bc, + level, ) + is_periodic = Topologies.isperiodic(Spaces.vertical_topology(space)) + (_, lw, rw, _) = window_bounds(space, field_or_bc) + window = if !is_periodic && level < lw + LeftBoundaryWindow{Spaces.left_boundary_name(space)}() + elseif !is_periodic && level > rw + RightBoundaryWindow{Spaces.right_boundary_name(space)}() + else + Interior() + end + return getidx(space, field_or_bc, window, level, (1, 1, 1)) +end -##### -##### CPU -##### +""" + UnspecifiedInit() -column_integral_indefinite!( - f::Function, - ::ClimaComms.AbstractCPUDevice, - ᶠ∫field, - args..., -) = column_integral_indefinite_cpu!(f, ᶠ∫field, args...) +Analogue of `Base._InitialValue` for `column_reduce!` and `column_accumulate!`. +""" +struct UnspecifiedInit end -column_integral_indefinite!( - f::Function, - ::ClimaComms.AbstractCPUDevice, - ᶠ∫field::Fields.FaceExtrudedFiniteDifferenceField, - args..., -) = - Fields.bycolumn(axes(ᶠ∫field)) do colidx - column_integral_indefinite_cpu!(f, ᶠ∫field[colidx], args...) +""" + column_reduce!(f, output, input; [init], [transform]) + +Applies `reduce` to `input` along the vertical direction, storing the result in +`output`. The `input` can be either a `Field` or an `AbstractBroadcasted` that +performs pointwise or columnwise operations on `Field`s. Each reduced value is +computed by iteratively applying `f` to the values in `input`, starting from the +bottom of each column and moving upward, and the result of the final iteration +is passed to the `transform` function before being stored in `output`. If `init` +is specified, it is used as the initial value of the iteration; otherwise, the +value at the bottom of each column in `input` is used as the initial value. + +With `first_level` and `last_level` denoting the indices of the boundary levels +of `input`, the reduction in each column can be summarized as follows: + - If `init` is unspecified, + ``` + reduced_value = input[first_level] + for level in (first_level + 1):last_level + reduced_value = f(reduced_value, input[level]) + end + output[] = transform(reduced_value) + ``` + - If `init` is specified, + ``` + reduced_value = init + for level in first_level:last_level + reduced_value = f(reduced_value, input[level]) end + output[] = transform(reduced_value) + ``` +""" +function column_reduce!( + f::F, + output::Fields.Field, + input::Union{Fields.Field, PointwiseOrColumnwiseBroadcasted}; + init = UnspecifiedInit(), + transform::T = identity, +) where {F, T} + device = ClimaComms.device(output) + space = axes(input) + column_reduce_device!(device, f, transform, output, input, init, space) +end -#= -Function-based signature, solve for ϕ: -``` -∂ϕ/∂z = f(ϕ,z) -(ᶠϕ^{k+1}-ᶠϕ^{k})/ᶜΔz = ᶜf(ᶜϕ̄,ᶜz) -ᶜϕ̄ = (ϕ^{k+1}+ϕ^{k})/2 -(ᶠϕ^{k+1}-ᶠϕ^{k})/ᶜΔz = ᶜf((ᶠϕ^{k+1}+ᶠϕ^{k})/2,ᶜz) -root equation: (_ϕ-ϕ^{k})/Δz = f((_ϕ+ϕ^{k})/2,ᶜz) -``` -=# -function column_integral_indefinite_cpu!( - f::Function, - ᶠ∫field::Fields.ColumnField, - ϕ₀ = zero(eltype(ᶠ∫field)), - average = (ϕ⁻, ϕ⁺) -> (ϕ⁻ + ϕ⁺) / 2, -) - cspace = dual_space(axes(ᶠ∫field)) - ᶜzfield = Fields.coordinate_field(cspace) - face_space = axes(ᶠ∫field) - first_level = Operators.left_idx(face_space) - last_level = Operators.right_idx(face_space) - ᶜΔzfield = Fields.Δz_field(ᶜzfield) - @inbounds Fields.level(ᶠ∫field, first_level)[] = ϕ₀ - ϕ₁ = ϕ₀ - @inbounds for level in (first_level + 1):last_level - ᶜz = Fields.level(ᶜzfield.z, level - half)[] - ᶜΔz = Fields.level(ᶜΔzfield, level - half)[] - ϕ₀ = ϕ₁ - root_eq(_x) = (_x - ϕ₀) / ᶜΔz - f(average(_x, ϕ₀), ᶜz) - sol = RootSolvers.find_zero( - root_eq, - RootSolvers.NewtonsMethodAD(ϕ₀), - RootSolvers.CompactSolution(), +column_reduce_device!( + ::ClimaComms.AbstractCPUDevice, + f::F, + transform::T, + output, + input, + init, + space, +) where {F, T} = + space isa Spaces.FiniteDifferenceSpace ? + single_column_reduce!(f, transform, output, input, init, space) : + Fields.bycolumn(space) do colidx + single_column_reduce!( + f, + transform, + output[colidx], + input[colidx], + init, + space[colidx], ) - ϕ₁ = sol.root - f₁ = f(average(ϕ₀, ϕ₁), ᶜz) - ᶜintegrand_lev = f₁ - @inbounds Fields.level(ᶠ∫field, level)[] = - radd(Fields.level(ᶠ∫field, level - 1)[], rmul(ᶜintegrand_lev, ᶜΔz)) end + +# On GPUs, input and output go through strip_space to become _input and _output. +function single_column_reduce!( + f::F, + transform::T, + _output, + _input, + init, + space, +) where {F, T} + first_level = left_idx(space) + last_level = right_idx(space) + @inbounds if init == UnspecifiedInit() + reduced_value = get_level_value(space, _input, first_level) + next_level = first_level + 1 + else + reduced_value = init + next_level = first_level + end + @inbounds for level in next_level:last_level + reduced_value = f(reduced_value, get_level_value(space, _input, level)) + end + Fields.field_values(_output)[] = transform(reduced_value) return nothing end """ - column_mapreduce!(fn, op, reduced_field::Field, fields::Field...) - -Applies mapreduce along the vertical direction. The input `fields` must all lie -on the same space, and the output `reduced_field` must lie on the corresponding -horizontal space. The function `fn` is mapped over every input, and the function -`op` is used to reduce the outputs of `fn`. + column_accumulate!(f, output, input; [init], [transform]) + +Applies `accumulate` to `input` along the vertical direction, storing the result +in `output`. The `input` can be either a `Field` or an `AbstractBroadcasted` +that performs pointwise or columnwise operations on `Field`s. Each accumulated +value is computed by iteratively applying `f` to the values in `input`, starting +from the bottom of each column and moving upward, and the result of each +iteration is passed to the `transform` function before being stored in `output`. +The `init` value is is optional for center-to-center, face-to-face, and +face-to-center accumulation, but it is required for center-to-face accumulation. + +With `first_level` and `last_level` denoting the indices of the boundary levels +of `input`, the accumulation in each column can be summarized as follows: + - For center-to-center and face-to-face accumulation with `init` unspecified, + ``` + accumulated_value = input[first_level] + output[first_level] = transform(accumulated_value) + for level in (first_level + 1):last_level + accumulated_value = f(accumulated_value, input[level]) + output[level] = transform(accumulated_value) + end + ``` + - For center-to-center and face-to-face accumulation with `init` specified, + ``` + accumulated_value = init + for level in first_level:last_level + accumulated_value = f(accumulated_value, input[level]) + output[level] = transform(accumulated_value) + end + ``` + - For face-to-center accumulation with `init` unspecified, + ``` + accumulated_value = input[first_level] + for level in (first_level + 1):last_level + accumulated_value = f(accumulated_value, input[level]) + output[level - half] = transform(accumulated_value) + end + ``` + - For face-to-center accumulation with `init` specified, + ``` + accumulated_value = f(init, input[first_level]) + for level in (first_level + 1):last_level + accumulated_value = f(accumulated_value, input[level]) + output[level - half] = transform(accumulated_value) + end + ``` + - For center-to-face accumulation, + ``` + accumulated_value = init + output[first_level - half] = transform(accumulated_value) + for level in first_level:last_level + accumulated_value = f(accumulated_value, input[level]) + output[level + half] = transform(accumulated_value) + end + ``` """ -column_mapreduce!( - fn::F, - op::O, - reduced_field::Fields.Field, - fields::Fields.Field..., -) where {F, O} = column_mapreduce_device!( - ClimaComms.device(reduced_field), - fn, - op, - reduced_field, - fields..., -) - -column_mapreduce_kernel!(fn::F, op::O, reduced_field, fields...) where {F, O} = - _column_mapreduce!(fn, op, reduced_field, fields...) +function column_accumulate!( + f::F, + output::Fields.Field, + input::Union{Fields.Field, PointwiseOrColumnwiseBroadcasted}; + init = UnspecifiedInit(), + transform::T = identity, +) where {F, T} + device = ClimaComms.device(output) + space = axes(input) + init == UnspecifiedInit() && + Spaces.staggering(space) == Spaces.CellCenter() && + Spaces.staggering(axes(output)) == Spaces.CellFace() && + error("init must be specified for center-to-face accumulation") + column_accumulate_device!(device, f, transform, output, input, init, space) +end -column_mapreduce_device!( +column_accumulate_device!( ::ClimaComms.AbstractCPUDevice, - fn::F, - op::O, - reduced_field::Fields.SpectralElementField, - fields::Fields.ExtrudedFiniteDifferenceField..., -) where {F, O} = - Fields.bycolumn(axes(reduced_field)) do colidx - column_fields = map(field -> field[colidx], fields) - _column_mapreduce!(fn, op, reduced_field[colidx], column_fields...) + f::F, + transform::T, + output, + input, + init, + space, +) where {F, T} = + space isa Spaces.FiniteDifferenceSpace ? + single_column_accumulate!(f, transform, output, input, init, space) : + Fields.bycolumn(space) do colidx + single_column_accumulate!( + f, + transform, + output[colidx], + input[colidx], + init, + space[colidx], + ) end -column_mapreduce_device!( - ::ClimaComms.AbstractCPUDevice, - fn::F, - op::O, - reduced_field::Fields.PointField, - fields::Fields.FiniteDifferenceField..., -) where {F, O} = _column_mapreduce!(fn, op, reduced_field, fields...) - -function _column_mapreduce!(fn::F, op::O, reduced_field, fields...) where {F, O} - space = axes(first(fields)) - for field in Base.tail(fields) # Base.rest breaks on the gpu - axes(field) === space || - error("All inputs to column_mapreduce must lie on the same space") +# On GPUs, input and output go through strip_space to become _input and _output. +function single_column_accumulate!( + f::F, + transform::T, + _output, + _input, + init, + space, +) where {F, T} + device = ClimaComms.device(space) + first_level = left_idx(space) + last_level = right_idx(space) + output = unstrip_space(_output, space) + is_c2c_or_f2f = Spaces.staggering(space) == Spaces.staggering(axes(output)) + is_c2f = !is_c2c_or_f2f && Spaces.staggering(space) == Spaces.CellCenter() + is_f2c = !is_c2c_or_f2f && !is_c2f + @inbounds if init == UnspecifiedInit() + @assert !is_c2f + accumulated_value = get_level_value(space, _input, first_level) + next_level = first_level + 1 + init_output_level = is_c2c_or_f2f ? first_level : nothing + else + accumulated_value = + is_f2c ? f(init, get_level_value(space, _input, first_level)) : init + next_level = is_f2c ? first_level + 1 : first_level + init_output_level = is_c2f ? first_level - half : nothing end - (_, _, _, Nv, _) = size(Fields.field_values(first(fields))) - first_level = left_boundary_idx(Nv, space) - last_level = right_boundary_idx(Nv, space) - # TODO: This code is allocating memory. In particular, even if we comment - # out the rest of this function, the first line alone allocates memory. - # This problem is not fixed by replacing map with ntuple or unrolled_map. - fields_data = map(field -> Fields.field_values(field), fields) - first_level_values = map( - field_data -> - (@inbounds data_level(field_data, space, first_level)[]), - fields_data, - ) - reduced_field_data = Fields.field_values(reduced_field) - Base.setindex!(reduced_field_data, fn(first_level_values...)) - for level in (first_level + 1):last_level - values = map( - field_data -> - (@inbounds data_level(field_data, space, level)[]), - fields_data, - ) - Base.setindex!( - reduced_field_data, - op(reduced_field_data[], fn(values...)), - ) + @inbounds if !isnothing(init_output_level) + Fields.level(output, init_output_level)[] = transform(accumulated_value) + end + @inbounds for level in next_level:last_level + accumulated_value = + f(accumulated_value, get_level_value(space, _input, level)) + output_level = + is_c2c_or_f2f ? level : (is_c2f ? level + half : level - half) + Fields.level(output, output_level)[] = transform(accumulated_value) end - return nothing end - -import ..Utilities -Base.@propagate_inbounds data_level( - data, - ::Operators.CenterPlaceholderSpace, - v::Int, -) = DataLayouts.level(data, v) -Base.@propagate_inbounds data_level( - data, - ::Spaces.CenterFiniteDifferenceSpace, - v::Int, -) = DataLayouts.level(data, v) - -Base.@propagate_inbounds data_level( - data, - ::Operators.FacePlaceholderSpace, - v::Utilities.PlusHalf, -) = DataLayouts.level(data, v.i + 1) -Base.@propagate_inbounds data_level( - data, - ::Spaces.FaceFiniteDifferenceSpace, - v::Utilities.PlusHalf, -) = DataLayouts.level(data, v.i + 1) - -left_boundary_idx(n, ::Operators.CenterPlaceholderSpace) = 1 -right_boundary_idx(n, ::Operators.CenterPlaceholderSpace) = n -left_boundary_idx(n, ::Operators.FacePlaceholderSpace) = Utilities.half -right_boundary_idx(n, ::Operators.FacePlaceholderSpace) = n - Utilities.half - -left_boundary_idx(n, ::Spaces.CenterFiniteDifferenceSpace) = 1 -right_boundary_idx(n, ::Spaces.CenterFiniteDifferenceSpace) = n -left_boundary_idx(n, ::Spaces.FaceFiniteDifferenceSpace) = Utilities.half -right_boundary_idx(n, ::Spaces.FaceFiniteDifferenceSpace) = n - Utilities.half diff --git a/src/interface.jl b/src/interface.jl index ceb5217eda..9148890415 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -39,5 +39,7 @@ Base.@propagate_inbounds column_args(args::Tuple, inds...) = Base.@propagate_inbounds column_args(args::Tuple{Any}, inds...) = (column(args[1], inds...),) Base.@propagate_inbounds column_args(args::Tuple{}, inds...) = () +Base.@propagate_inbounds column_args(args::NamedTuple, inds...) = + NamedTuple{keys(args)}(column_args(values(args), inds...)) function level end diff --git a/test/Operators/integrals.jl b/test/Operators/integrals.jl index decc073622..9ed2b5268c 100644 --- a/test/Operators/integrals.jl +++ b/test/Operators/integrals.jl @@ -5,9 +5,11 @@ import ClimaComms ClimaComms.@import_required_backends import ClimaCore import ClimaCore: Spaces, Fields, Operators -import ClimaCore.RecursiveApply: rmax import ClimaCore.Operators: - column_integral_definite!, column_integral_indefinite!, column_mapreduce! + column_integral_definite!, + column_integral_indefinite!, + column_reduce!, + column_accumulate! include( joinpath(pkgdir(ClimaCore), "test", "TestUtilities", "TestUtilities.jl"), @@ -20,19 +22,14 @@ center_to_face_space(center_space::Spaces.CenterFiniteDifferenceSpace) = center_to_face_space(center_space::Spaces.CenterExtrudedFiniteDifferenceSpace) = Spaces.FaceExtrudedFiniteDifferenceSpace(center_space) -test_allocs(lim::Nothing, allocs, caller) = nothing -function test_allocs(lim::Int, allocs, caller) - @assert allocs ≥ 0 - if lim == 0 +test_allocs(allocs) = + if ClimaComms.device() isa ClimaComms.AbstractCPUDevice @test allocs == 0 else - @test allocs ≤ lim - allocs < lim && @show allocs, lim, caller - @test_broken allocs == 0 # notify when things improve + @test allocs ≤ 5000 # GPU always has ~2 kB of non-deterministic allocs. end -end -function test_column_integral_definite!(center_space, alloc_lim) +function test_column_integral_definite!(center_space) face_space = center_to_face_space(center_space) ᶜz = Fields.coordinate_field(center_space).z ᶠz = Fields.coordinate_field(face_space).z @@ -53,11 +50,10 @@ function test_column_integral_definite!(center_space, alloc_lim) cuda = (AnyFrameModule(CUDA),) @test_opt ignored_modules = cuda column_integral_definite!(∫u_test, ᶜu) - allocs = @allocated column_integral_definite!(∫u_test, ᶜu) - test_allocs(alloc_lim, allocs, "test_column_integral_definite") + test_allocs(@allocated column_integral_definite!(∫u_test, ᶜu)) end -function test_column_integral_indefinite!(center_space, alloc_lim) +function test_column_integral_indefinite!(center_space) face_space = center_to_face_space(center_space) ᶜz = Fields.coordinate_field(center_space).z ᶠz = Fields.coordinate_field(face_space).z @@ -74,27 +70,14 @@ function test_column_integral_indefinite!(center_space, alloc_lim) cuda = (AnyFrameModule(CUDA),) @test_opt ignored_modules = cuda column_integral_indefinite!(ᶠ∫u_test, ᶜu) - allocs = @allocated column_integral_indefinite!(ᶠ∫u_test, ᶜu) - test_allocs(alloc_lim, allocs, "test_column_integral_indefinite!") + test_allocs(@allocated column_integral_indefinite!(ᶠ∫u_test, ᶜu)) end -function test_column_integral_indefinite_fn!(center_space, alloc_lim) +function test_column_integral_indefinite_fn!(center_space) face_space = center_to_face_space(center_space) ᶜz = Fields.coordinate_field(center_space).z ᶠz = Fields.coordinate_field(face_space).z - FT = Spaces.undertype(center_space) - - ᶜu = Dict() - ᶠ∫u_ref = Dict() - ᶠ∫u_test = Dict() - - - # ᶜu = map(z -> (; one = one(z), powers = (z, z^2, z^3)), ᶜz) - # ᶠ∫u_ref = map(z -> (; one = z, powers = (z^2 / 2, z^3 / 3, z^4 / 4)), ᶠz) - # ᶠ∫u_test = similar(ᶠ∫u_ref) - for (i, fn) in enumerate(((ϕ, z) -> z, (ϕ, z) -> z^2, (ϕ, z) -> z^3)) - ᶜu = ᶜz .^ i ᶠ∫u_ref = ᶠz .^ (i + 1) ./ (i + 1) ᶠ∫u_test = similar(ᶠ∫u_ref) @@ -107,136 +90,79 @@ function test_column_integral_indefinite_fn!(center_space, alloc_lim) maximum(@. abs((ref_array - test_array) / ref_array)) @test max_relative_error <= 0.006 # Less than 0.6% error at the top level. - # @test_opt column_integral_indefinite!(fn, ᶠ∫u_test) + cuda = (AnyFrameModule(CUDA),) + @test_opt ignored_modules = cuda column_integral_indefinite!( + fn, + ᶠ∫u_test, + ) - allocs = @allocated column_integral_indefinite!(fn, ᶠ∫u_test) - test_allocs(alloc_lim, allocs, "test_column_integral_indefinite_fn!") + test_allocs(@allocated column_integral_indefinite!(fn, ᶠ∫u_test)) end - end -function test_column_mapreduce!(space, alloc_lim) - z_field = Fields.coordinate_field(space).z - z_top_field = Fields.level(z_field, Operators.right_idx(space)) - sin_field = @. sin(pi * z_field / z_top_field) - square_and_sin(z, sin_value) = (; square = z^2, sin = sin_value) - device = ClimaComms.device(z_field) - reduced_field_ref = ClimaComms.allowscalar(device) do - map(z -> (; square = z^2, sin = one(z)), z_top_field) +function test_column_reduce_and_accumulate!(center_space) + face_space = center_to_face_space(center_space) + ᶜwhole_number = ones(center_space) + column_accumulate!(+, ᶜwhole_number, ᶜwhole_number) # 1:Nv per column + ᶠwhole_number = ones(face_space) + column_accumulate!(+, ᶠwhole_number, ᶠwhole_number) # 1:(Nv + 1) per column + + safe_binomial(n, k) = binomial(Int32(n), Int32(k)) # GPU-compatible binomial + + # https://en.wikipedia.org/wiki/Motzkin_number#Properties + motzkin_number(n) = + sum(0:(n / 2); init = zero(n)) do k + safe_binomial(n, 2 * k) * safe_binomial(2 * k, k) / (k + 1) + end + recursive_motzkin_number((mₙ₋₁, mₙ₋₂), n) = + ((2 * n + 1) * mₙ₋₁ + (3 * n - 3) * mₙ₋₂) / (n + 2) + + # On step n of the reduction/accumulation, update (mₙ₋₁, mₙ₋₂) to (mₙ, mₙ₋₁). + f((mₙ₋₁, mₙ₋₂), n) = (recursive_motzkin_number((mₙ₋₁, mₙ₋₂), n), mₙ₋₁) + init = (1, 0) # m₀ = 1, m₋₁ = 0 (m₋₁ can be set to any finite value) + transform = first # Get mₙ from each (mₙ, mₙ₋₁) pair before saving to output. + + for input in (ᶜwhole_number, ᶠwhole_number) + last_input_level = Fields.level(input, Operators.right_idx(axes(input))) + output = similar(last_input_level) + reference_output = motzkin_number.(last_input_level) + + set_output! = () -> column_reduce!(f, output, input; init, transform) + set_output!() + @test output == reference_output + @test_opt ignored_modules = (AnyFrameModule(CUDA),) set_output!() + test_allocs(@allocated set_output!()) end - reduced_field_test = similar(reduced_field_ref) - args = (square_and_sin, rmax, reduced_field_test, z_field, sin_field) - column_mapreduce!(args...) - ref_array = parent(reduced_field_ref) - test_array = parent(reduced_field_test) - max_relative_error = maximum(@. abs((ref_array - test_array) / ref_array)) - @test max_relative_error <= 0.004 # Less than 0.4% error. - - cuda = (AnyFrameModule(CUDA),) - if alloc_lim == 0 - @test_opt ignored_modules = cuda column_mapreduce!(args...) + ᶜoutput = similar(ᶜwhole_number) + ᶠoutput = similar(ᶠwhole_number) + for (input, output, reference_output) in ( + (ᶜwhole_number, ᶜoutput, motzkin_number.(ᶜwhole_number)), + (ᶠwhole_number, ᶠoutput, motzkin_number.(ᶠwhole_number)), + (ᶠwhole_number, ᶜoutput, motzkin_number.(ᶜwhole_number .+ 1)), + (ᶜwhole_number, ᶠoutput, motzkin_number.(ᶠwhole_number .- 1)), + ) + set_output! = + () -> column_accumulate!(f, output, input; init, transform) + set_output!() + @test output == reference_output + @test_opt ignored_modules = (AnyFrameModule(CUDA),) set_output!() + test_allocs(@allocated set_output!()) end - allocs = @allocated column_mapreduce!(args...) - test_allocs(alloc_lim, allocs, "test_column_mapreduce!") end @testset "Integral operations unit tests" begin - # device = ClimaComms.CPUSingleThreaded(); device = ClimaComms.device() context = ClimaComms.SingletonCommsContext(device) - broken = device isa ClimaComms.CUDADevice - if device isa ClimaComms.CPUSingleThreaded - i_lim = Dict() - i_lim[(1, Float32)] = 0 - i_lim[(2, Float32)] = 0 - i_lim[(3, Float32)] = 0 - i_lim[(4, Float32)] = 0 - - i_lim[(1, Float64)] = 0 - i_lim[(2, Float64)] = 0 - i_lim[(3, Float64)] = 0 - i_lim[(4, Float64)] = 0 - - lim = Dict() - lim[(1, Float32)] = 2768 - lim[(2, Float32)] = 2960 - lim[(3, Float32)] = 8183808 - lim[(4, Float32)] = 8650752 - - lim[(1, Float64)] = 3648 - lim[(2, Float64)] = 3920 - lim[(3, Float64)] = 9510912 - lim[(4, Float64)] = 10100736 - else - i_lim = Dict() - i_lim[(1, Float32)] = 1728 - i_lim[(2, Float32)] = 4720 - i_lim[(3, Float32)] = 2304 - i_lim[(4, Float32)] = 8176 - - i_lim[(1, Float64)] = 1936 - i_lim[(2, Float64)] = 4720 - i_lim[(3, Float64)] = 2544 - i_lim[(4, Float64)] = 8176 - - lim = Dict() - lim[(1, Float32)] = 34144 - lim[(2, Float32)] = 37600 - lim[(3, Float32)] = 4399104 - lim[(4, Float32)] = 4571136 - - lim[(1, Float64)] = 35024 - lim[(2, Float64)] = 38560 - lim[(3, Float64)] = 5455872 - lim[(4, Float64)] = 5726208 - end - if are_boundschecks_forced - lim = Dict(k => nothing for k in keys(lim)) - i_lim = Dict(k => nothing for k in keys(lim)) - end - for FT in (Float32, Float64) - test_column_integral_definite!( + for space in ( TU.ColumnCenterFiniteDifferenceSpace(FT; context), - i_lim[(1, FT)], - ) - test_column_integral_definite!( TU.CenterExtrudedFiniteDifferenceSpace(FT; context), - i_lim[(2, FT)], - ) - test_column_integral_indefinite!( - TU.ColumnCenterFiniteDifferenceSpace(FT; context), - i_lim[(3, FT)], - ) - test_column_integral_indefinite!( - TU.CenterExtrudedFiniteDifferenceSpace(FT; context), - i_lim[(4, FT)], - ) - broken || test_column_integral_indefinite_fn!( - TU.ColumnCenterFiniteDifferenceSpace(FT; context), - i_lim[(3, FT)], - ) - broken || test_column_integral_indefinite_fn!( - TU.CenterExtrudedFiniteDifferenceSpace(FT; context), - i_lim[(4, FT)], - ) - - broken || test_column_mapreduce!( - TU.ColumnCenterFiniteDifferenceSpace(FT; context), - lim[(1, FT)], - ) - broken || test_column_mapreduce!( - TU.ColumnFaceFiniteDifferenceSpace(FT; context), - lim[(2, FT)], - ) - broken || test_column_mapreduce!( - TU.CenterExtrudedFiniteDifferenceSpace(FT; context), - lim[(3, FT)], - ) - broken || test_column_mapreduce!( - TU.FaceExtrudedFiniteDifferenceSpace(FT; context), - lim[(4, FT)], ) + test_column_integral_definite!(space) + test_column_integral_indefinite!(space) + test_column_integral_indefinite_fn!(space) + test_column_reduce_and_accumulate!(space) + end end end