diff --git a/ext/cuda/cuda_utils.jl b/ext/cuda/cuda_utils.jl index d9ee184d78..15ee90ce34 100644 --- a/ext/cuda/cuda_utils.jl +++ b/ext/cuda/cuda_utils.jl @@ -3,11 +3,6 @@ import ClimaCore.Fields import ClimaCore.DataLayouts import ClimaCore.DataLayouts: empty_kernel_stats -get_n_items(field::Fields.Field) = get_n_items(Fields.field_values(field)) -get_n_items(data::DataLayouts.AbstractData) = get_n_items(size(data)) -get_n_items(arr::AbstractArray) = get_n_items(size(parent(arr))) -get_n_items(tup::Tuple) = prod(tup) - const reported_stats = Dict() # Call via ClimaCore.DataLayouts.empty_kernel_stats() empty_kernel_stats(::ClimaComms.CUDADevice) = empty!(reported_stats) @@ -37,7 +32,7 @@ to benchmark compare against auto-determined threads/blocks (if `auto=false`). function auto_launch!( f!::F!, args, - data; + nitems::Union{Integer, Nothing} = nothing; auto = false, threads_s = nothing, blocks_s = nothing, @@ -45,7 +40,7 @@ function auto_launch!( caller = :unknown, ) where {F!} if auto - nitems = get_n_items(data) + @assert !isnothing(nitems) if nitems ≥ 0 kernel = CUDA.@cuda always_inline = true launch = false f!(args...) config = CUDA.launch_configuration(kernel.fun) @@ -64,7 +59,7 @@ function auto_launch!( # CUDA.registers(kernel) > 50 || return nothing # for debugging # occursin("single_field_solve_kernel", string(nameof(F!))) || return nothing if !haskey(reported_stats, key) - nitems = get_n_items(data) + @assert !isnothing(nitems) kernel = CUDA.@cuda always_inline = true launch = false f!(args...) config = CUDA.launch_configuration(kernel.fun) threads = min(nitems, config.threads) diff --git a/ext/cuda/data_layouts_copyto.jl b/ext/cuda/data_layouts_copyto.jl index d32b6aee54..82cf46d88c 100644 --- a/ext/cuda/data_layouts_copyto.jl +++ b/ext/cuda/data_layouts_copyto.jl @@ -23,8 +23,7 @@ function Base.copyto!( if Nh > 0 auto_launch!( knl_copyto!, - (dest, bc), - dest; + (dest, bc); threads_s = (Nij, Nij), blocks_s = (Nh, 1), ) @@ -42,8 +41,7 @@ function Base.copyto!( Nv_blocks = cld(Nv, Nv_per_block) auto_launch!( knl_copyto!, - (dest, bc), - dest; + (dest, bc); threads_s = (Nij, Nij, Nv_per_block), blocks_s = (Nh, Nv_blocks), ) @@ -59,8 +57,7 @@ function Base.copyto!( if Nv > 0 auto_launch!( knl_copyto!, - (dest, bc), - dest; + (dest, bc); threads_s = (1, 1), blocks_s = (1, Nv), ) @@ -73,13 +70,7 @@ function Base.copyto!( bc::DataLayouts.BroadcastedUnionDataF{S}, ::ToCUDA, ) where {S} - auto_launch!( - knl_copyto!, - (dest, bc), - dest; - threads_s = (1, 1), - blocks_s = (1, 1), - ) + auto_launch!(knl_copyto!, (dest, bc); threads_s = (1, 1), blocks_s = (1, 1)) return dest end @@ -100,7 +91,8 @@ function cuda_copyto!(dest::AbstractData, bc) (_, _, Nv, _, Nh) = DataLayouts.universal_size(dest) us = DataLayouts.UniversalSize(dest) if Nv > 0 && Nh > 0 - auto_launch!(knl_copyto_flat!, (dest, bc, us), dest; auto = true) + nitems = prod(DataLayouts.universal_size(dest)) + auto_launch!(knl_copyto_flat!, (dest, bc, us), nitems; auto = true) end return dest end diff --git a/ext/cuda/data_layouts_fill.jl b/ext/cuda/data_layouts_fill.jl index 087d5f2a84..cac5bdf526 100644 --- a/ext/cuda/data_layouts_fill.jl +++ b/ext/cuda/data_layouts_fill.jl @@ -14,7 +14,8 @@ function cuda_fill!(dest::AbstractData, val) (_, _, Nv, _, Nh) = DataLayouts.universal_size(dest) us = DataLayouts.UniversalSize(dest) if Nv > 0 && Nh > 0 - auto_launch!(knl_fill_flat!, (dest, val, us), dest; auto = true) + nitems = prod(DataLayouts.universal_size(dest)) + auto_launch!(knl_fill_flat!, (dest, val, us), nitems; auto = true) end return dest end diff --git a/ext/cuda/data_layouts_fused_copyto.jl b/ext/cuda/data_layouts_fused_copyto.jl index a566e69a5f..0b1d1126d1 100644 --- a/ext/cuda/data_layouts_fused_copyto.jl +++ b/ext/cuda/data_layouts_fused_copyto.jl @@ -50,8 +50,7 @@ function fused_copyto!( Nv_blocks = cld(Nv, Nv_per_block) auto_launch!( knl_fused_copyto!, - (fmbc,), - dest1; + (fmbc,); threads_s = (Nij, Nij, Nv_per_block), blocks_s = (Nh, Nv_blocks), ) @@ -68,8 +67,7 @@ function fused_copyto!( if Nh > 0 auto_launch!( knl_fused_copyto!, - (fmbc,), - dest1; + (fmbc,); threads_s = (Nij, Nij), blocks_s = (Nh, 1), ) @@ -85,8 +83,7 @@ function fused_copyto!( if Nv > 0 && Nh > 0 auto_launch!( knl_fused_copyto!, - (fmbc,), - dest1; + (fmbc,); threads_s = (1, 1), blocks_s = (Nh, Nv), ) @@ -101,8 +98,7 @@ function fused_copyto!( ) where {S} auto_launch!( knl_fused_copyto!, - (fmbc,), - dest1; + (fmbc,); threads_s = (1, 1), blocks_s = (1, 1), ) diff --git a/ext/cuda/data_layouts_mapreduce.jl b/ext/cuda/data_layouts_mapreduce.jl index 5d63cf365d..18435eade6 100644 --- a/ext/cuda/data_layouts_mapreduce.jl +++ b/ext/cuda/data_layouts_mapreduce.jl @@ -28,7 +28,7 @@ function mapreduce_cuda( pdata = parent(data) T = eltype(pdata) (Ni, Nj, Nk, Nv, Nh) = size(data) - Nf = div(length(pdata), prod(size(data))) # length of field dimension + Nf = DataLayouts.ncomponents(data) # length of field dimension pwt = parent(weighted_jacobian) nitems = Nv * Ni * Nj * Nk * Nh diff --git a/ext/cuda/limiters.jl b/ext/cuda/limiters.jl index 7511c279f4..a7dd6e393a 100644 --- a/ext/cuda/limiters.jl +++ b/ext/cuda/limiters.jl @@ -21,23 +21,15 @@ function compute_element_bounds!( ρ, ::ClimaComms.CUDADevice, ) - S = size(Fields.field_values(ρ)) - (Ni, Nj, _, Nv, Nh) = S + ρ_values = Fields.field_values(Operators.strip_space(ρ, axes(ρ))) + ρq_values = Fields.field_values(Operators.strip_space(ρq, axes(ρq))) + (_, _, _, Nv, Nh) = DataLayouts.universal_size(ρ_values) nthreads, nblocks = config_threadblock(Nv, Nh) - args = ( - limiter, - Fields.field_values(Operators.strip_space(ρq, axes(ρq))), - Fields.field_values(Operators.strip_space(ρ, axes(ρ))), - Nv, - Nh, - Val(Ni), - Val(Nj), - ) + args = (limiter, ρq_values, ρ_values) auto_launch!( compute_element_bounds_kernel!, - args, - ρ; + args; threads_s = nthreads, blocks_s = nblocks, ) @@ -45,15 +37,8 @@ function compute_element_bounds!( end -function compute_element_bounds_kernel!( - limiter, - ρq, - ρ, - Nv, - Nh, - ::Val{Ni}, - ::Val{Nj}, -) where {Ni, Nj} +function compute_element_bounds_kernel!(limiter, ρq, ρ) + (Ni, Nj, _, Nv, Nh) = DataLayouts.universal_size(ρ) n = (Nv, Nh) tidx = thread_index() @inbounds if valid_range(tidx, prod(n)) @@ -88,21 +73,18 @@ function compute_neighbor_bounds_local!( ::ClimaComms.CUDADevice, ) topology = Spaces.topology(axes(ρ)) - Ni, Nj, _, Nv, Nh = size(Fields.field_values(ρ)) + us = DataLayouts.UniversalSize(Fields.field_values(ρ)) + (_, _, _, Nv, Nh) = DataLayouts.universal_size(us) nthreads, nblocks = config_threadblock(Nv, Nh) args = ( limiter, topology.local_neighbor_elem, topology.local_neighbor_elem_offset, - Nv, - Nh, - Val(Ni), - Val(Nj), + us, ) auto_launch!( compute_neighbor_bounds_local_kernel!, - args, - ρ; + args; threads_s = nthreads, blocks_s = nblocks, ) @@ -112,12 +94,9 @@ function compute_neighbor_bounds_local_kernel!( limiter, local_neighbor_elem, local_neighbor_elem_offset, - Nv, - Nh, - ::Val{Ni}, - ::Val{Nj}, -) where {Ni, Nj} - + us::DataLayouts.UniversalSize, +) + (_, _, _, Nv, Nh) = DataLayouts.universal_size(us) n = (Nv, Nh) tidx = thread_index() @inbounds if valid_range(tidx, prod(n)) @@ -147,9 +126,10 @@ function apply_limiter!( ::ClimaComms.CUDADevice, ) ρq_data = Fields.field_values(ρq) - (Ni, Nj, _, Nv, Nh) = size(ρq_data) - Nf = DataLayouts.ncomponents(ρq_data) + us = DataLayouts.UniversalSize(ρq_data) + (Ni, Nj, _, Nv, Nh) = DataLayouts.universal_size(us) maxiter = Ni * Nj + Nf = DataLayouts.ncomponents(ρq_data) WJ = Spaces.local_geometry_data(axes(ρq)).WJ nthreads, nblocks = config_threadblock(Nv, Nh) args = ( @@ -157,17 +137,13 @@ function apply_limiter!( Fields.field_values(Operators.strip_space(ρq, axes(ρq))), Fields.field_values(Operators.strip_space(ρ, axes(ρ))), WJ, - Nv, - Nh, + us, Val(Nf), - Val(Ni), - Val(Nj), Val(maxiter), ) auto_launch!( apply_limiter_kernel!, - args, - ρ; + args; threads_s = nthreads, blocks_s = nblocks, ) @@ -179,15 +155,13 @@ function apply_limiter_kernel!( ρq_data, ρ_data, WJ_data, - Nv, - Nh, + us::DataLayouts.UniversalSize, ::Val{Nf}, - ::Val{Ni}, - ::Val{Nj}, ::Val{maxiter}, -) where {Nf, Ni, Nj, maxiter} +) where {Nf, maxiter} (; q_bounds_nbr, rtol) = limiter converged = true + (Ni, Nj, _, Nv, Nh) = DataLayouts.universal_size(us) n = (Nv, Nh) tidx = thread_index() @inbounds if valid_range(tidx, prod(n)) diff --git a/ext/cuda/matrix_fields_multiple_field_solve.jl b/ext/cuda/matrix_fields_multiple_field_solve.jl index afd5c4adb8..d4150da5bc 100644 --- a/ext/cuda/matrix_fields_multiple_field_solve.jl +++ b/ext/cuda/matrix_fields_multiple_field_solve.jl @@ -38,8 +38,7 @@ NVTX.@annotate function multiple_field_solve!( auto_launch!( multiple_field_solve_kernel!, - args, - x1; + args; threads_s = nthreads, blocks_s = nblocks, always_inline = true, diff --git a/ext/cuda/matrix_fields_single_field_solve.jl b/ext/cuda/matrix_fields_single_field_solve.jl index c1149a1e8a..67e520a823 100644 --- a/ext/cuda/matrix_fields_single_field_solve.jl +++ b/ext/cuda/matrix_fields_single_field_solve.jl @@ -21,8 +21,7 @@ function single_field_solve!(device::ClimaComms.CUDADevice, cache, x, A, b) args = (device, cache, x, A, b) auto_launch!( single_field_solve_kernel!, - args, - x; + args; threads_s = nthreads, blocks_s = nblocks, ) diff --git a/ext/cuda/operators_finite_difference.jl b/ext/cuda/operators_finite_difference.jl index 980e5a813a..1cb8407136 100644 --- a/ext/cuda/operators_finite_difference.jl +++ b/ext/cuda/operators_finite_difference.jl @@ -36,8 +36,7 @@ function Base.copyto!( (strip_space(out, space), strip_space(bc, space), axes(out), bounds, us) auto_launch!( copyto_stencil_kernel!, - args, - out; + args; threads_s = (nthreads,), blocks_s = (nblocks,), ) diff --git a/ext/cuda/operators_integral.jl b/ext/cuda/operators_integral.jl index 651a010a47..cf5e5d2ac8 100644 --- a/ext/cuda/operators_integral.jl +++ b/ext/cuda/operators_integral.jl @@ -29,7 +29,7 @@ function column_reduce_device!( init, space, ) - auto_launch!(bycolumn_kernel!, args, (); threads_s, blocks_s) + auto_launch!(bycolumn_kernel!, args; threads_s, blocks_s) end function column_accumulate_device!( @@ -52,7 +52,7 @@ function column_accumulate_device!( init, space, ) - auto_launch!(bycolumn_kernel!, args, (); threads_s, blocks_s) + auto_launch!(bycolumn_kernel!, args; threads_s, blocks_s) end bycolumn_kernel!( diff --git a/ext/cuda/operators_spectral_element.jl b/ext/cuda/operators_spectral_element.jl index d99400ba97..47c6cb1c82 100644 --- a/ext/cuda/operators_spectral_element.jl +++ b/ext/cuda/operators_spectral_element.jl @@ -51,8 +51,7 @@ function Base.copyto!( ) auto_launch!( copyto_spectral_kernel!, - args, - out; + args; threads_s = (Nq, Nq, Nvthreads), blocks_s = (Nh, Nvblocks), ) diff --git a/ext/cuda/operators_thomas_algorithm.jl b/ext/cuda/operators_thomas_algorithm.jl index 9d416dfc7c..83546518ab 100644 --- a/ext/cuda/operators_thomas_algorithm.jl +++ b/ext/cuda/operators_thomas_algorithm.jl @@ -10,8 +10,7 @@ function column_thomas_solve!(::ClimaComms.CUDADevice, A, b) args = (A, b) auto_launch!( thomas_algorithm_kernel!, - args, - size(Fields.field_values(A)); + args; threads_s = nthreads, blocks_s = nblocks, ) diff --git a/ext/cuda/remapping_distributed.jl b/ext/cuda/remapping_distributed.jl index f5fc20183b..70246c47d4 100644 --- a/ext/cuda/remapping_distributed.jl +++ b/ext/cuda/remapping_distributed.jl @@ -29,8 +29,7 @@ function _set_interpolated_values_device!( ) auto_launch!( set_interpolated_values_kernel!, - args, - out; + args; threads_s = (nthreads), blocks_s = (nblocks), ) @@ -163,8 +162,7 @@ function _set_interpolated_values_device!( ) auto_launch!( set_interpolated_values_kernel!, - args, - out; + args; threads_s = (nthreads), blocks_s = (nblocks), ) diff --git a/ext/cuda/remapping_interpolate_array.jl b/ext/cuda/remapping_interpolate_array.jl index 88d3234913..d96862c679 100644 --- a/ext/cuda/remapping_interpolate_array.jl +++ b/ext/cuda/remapping_interpolate_array.jl @@ -22,8 +22,7 @@ function interpolate_slab!( args = (output_cuarray, field, cuslab_indices, cuweights) auto_launch!( interpolate_slab_kernel!, - args, - output_cuarray; + args; threads_s = (nthreads), blocks_s = (nblocks), ) @@ -107,8 +106,7 @@ function interpolate_slab_level!( args = (output_cuarray, field, cuvidx_ref_coordinates, h, Is) auto_launch!( interpolate_slab_level_kernel!, - args, - out; + args; threads_s = (nthreads), blocks_s = (nblocks), ) diff --git a/ext/cuda/topologies_dss.jl b/ext/cuda/topologies_dss.jl index ac4a08b722..062917cde2 100644 --- a/ext/cuda/topologies_dss.jl +++ b/ext/cuda/topologies_dss.jl @@ -20,16 +20,13 @@ function Topologies.dss_load_perimeter_data!( data::Union{DataLayouts.IJFH, DataLayouts.VIJFH}, perimeter::Topologies.Perimeter2D, ) - pperimeter_data = parent(dss_buffer.perimeter_data) - pdata = parent(data) - (nlevels, nperimeter, nfid, nelems) = size(pperimeter_data) - nitems = nlevels * nperimeter * nfid * nelems + (; perimeter_data) = dss_buffer + nitems = prod(DataLayouts.farray_size(perimeter_data)) nthreads, nblocks = _configure_threadblock(nitems) - args = (pperimeter_data, pdata, perimeter) + args = (perimeter_data, data, perimeter) auto_launch!( dss_load_perimeter_data_kernel!, - args, - pperimeter_data; + args; threads_s = (nthreads), blocks_s = (nblocks), ) @@ -37,13 +34,16 @@ function Topologies.dss_load_perimeter_data!( end function dss_load_perimeter_data_kernel!( - pperimeter_data::AbstractArray{FT, 4}, - pdata::Union{AbstractArray{FT, 4}, AbstractArray{FT, 5}}, + perimeter_data::DataLayouts.AbstractData, + data::Union{DataLayouts.IJFH, DataLayouts.VIJFH}, perimeter::Topologies.Perimeter2D{Nq}, -) where {FT <: AbstractFloat, Nq} - gidx = threadIdx().x + (blockIdx().x - 1) * blockDim().x - (nlevels, _, nfidx, nelems) = sizep = size(pperimeter_data) # size of perimeter data array +) where {Nq} + gidx = threadIdx().x + (blockIdx().x - Int32(1)) * blockDim().x + (nlevels, _, nfidx, nelems) = + sizep = DataLayouts.farray_size(perimeter_data) # size of perimeter data array sized = (nlevels, Nq, Nq, nfidx, nelems) # size of data + pperimeter_data = parent(perimeter_data) + pdata = parent(data) if gidx ≤ prod(sizep) (level, p, fidx, elem) = cart_ind(sizep, gidx).I @@ -60,16 +60,13 @@ function Topologies.dss_unload_perimeter_data!( dss_buffer::Topologies.DSSBuffer, perimeter, ) - pperimeter_data = parent(dss_buffer.perimeter_data) - pdata = parent(data) - (nlevels, nperimeter, nfid, nelems) = size(pperimeter_data) - nitems = nlevels * nperimeter * nfid * nelems + (; perimeter_data) = dss_buffer + nitems = prod(DataLayouts.farray_size(perimeter_data)) nthreads, nblocks = _configure_threadblock(nitems) - args = (pdata, pperimeter_data, perimeter) + args = (data, perimeter_data, perimeter) auto_launch!( dss_unload_perimeter_data_kernel!, - args, - pdata; + args; threads_s = (nthreads), blocks_s = (nblocks), ) @@ -77,13 +74,16 @@ function Topologies.dss_unload_perimeter_data!( end function dss_unload_perimeter_data_kernel!( - pdata::Union{AbstractArray{FT, 4}, AbstractArray{FT, 5}}, - pperimeter_data::AbstractArray{FT, 4}, + data::Union{DataLayouts.IJFH, DataLayouts.VIJFH}, + perimeter_data::AbstractData, perimeter::Topologies.Perimeter2D{Nq}, -) where {FT <: AbstractFloat, Nq} - gidx = threadIdx().x + (blockIdx().x - 1) * blockDim().x - (nlevels, nperimeter, nfidx, nelems) = sizep = size(pperimeter_data) # size of perimeter data array +) where {Nq} + gidx = threadIdx().x + (blockIdx().x - Int32(1)) * blockDim().x + (nlevels, nperimeter, nfidx, nelems) = + sizep = DataLayouts.farray_size(perimeter_data) # size of perimeter data array sized = (nlevels, Nq, Nq, nfidx, nelems) # size of data + pperimeter_data = parent(perimeter_data) + pdata = parent(data) if gidx ≤ prod(sizep) (level, p, fidx, elem) = cart_ind(sizep, gidx).I @@ -103,13 +103,13 @@ function Topologies.dss_local!( nlocalvertices = length(topology.local_vertex_offset) - 1 nlocalfaces = length(topology.interior_faces) if (nlocalvertices + nlocalfaces) > 0 - pperimeter_data = parent(perimeter_data) - (nlevels, nperimeter, nfid, nelems) = size(pperimeter_data) + (nlevels, nperimeter, nfid, nelems) = + DataLayouts.farray_size(perimeter_data) nitems = nlevels * nfid * (nlocalfaces + nlocalvertices) nthreads, nblocks = _configure_threadblock(nitems) args = ( - pperimeter_data, + perimeter_data, topology.local_vertices, topology.local_vertex_offset, topology.interior_faces, @@ -117,8 +117,7 @@ function Topologies.dss_local!( ) auto_launch!( dss_local_kernel!, - args, - pperimeter_data; + args; threads_s = (nthreads), blocks_s = (nblocks), ) @@ -127,16 +126,19 @@ function Topologies.dss_local!( end function dss_local_kernel!( - pperimeter_data::AbstractArray{FT, 4}, + perimeter_data::DataLayouts.VIFH, local_vertices::AbstractVector{Tuple{Int, Int}}, local_vertex_offset::AbstractVector{Int}, interior_faces::AbstractVector{Tuple{Int, Int, Int, Int, Bool}}, perimeter::Topologies.Perimeter2D{Nq}, -) where {FT <: AbstractFloat, Nq} - gidx = threadIdx().x + (blockIdx().x - 1) * blockDim().x +) where {Nq} + FT = eltype(parent(perimeter_data)) + gidx = threadIdx().x + (blockIdx().x - Int32(1)) * blockDim().x nlocalvertices = length(local_vertex_offset) - 1 nlocalfaces = length(interior_faces) - (nlevels, nperimeter, nfidx, _) = size(pperimeter_data) + pperimeter_data = parent(perimeter_data) + FT = eltype(pperimeter_data) + (nlevels, nperimeter, nfidx, _) = DataLayouts.farray_size(perimeter_data) if gidx ≤ nlevels * nfidx * nlocalvertices # local vertices sizev = (nlevels, nfidx, nlocalvertices) (level, fidx, vertexid) = cart_ind(sizev, gidx).I @@ -200,15 +202,15 @@ function Topologies.dss_transform!( p∂ξ∂x = parent(∂ξ∂x) pperimeter_data = parent(perimeter_data) nmetric = cld(length(p∂ξ∂x), prod(size(∂ξ∂x))) - (nlevels, nperimeter, _, _) = size(pperimeter_data) + (nlevels, nperimeter, _, _) = DataLayouts.array_size(perimeter_data) nitems = nlevels * nperimeter * nlocalelems nthreads, nblocks = _configure_threadblock(nitems) args = ( - pperimeter_data, + perimeter_data, pdata, p∂ξ∂x, p∂x∂ξ, - nmetric, + Val(nmetric), pweight, perimeter, scalarfidx, @@ -217,11 +219,11 @@ function Topologies.dss_transform!( covariant123fidx, contravariant123fidx, localelems, + Val(nlocalelems), ) auto_launch!( dss_transform_kernel!, - args, - pperimeter_data; + args; threads_s = (nthreads), blocks_s = (nblocks), ) @@ -230,11 +232,11 @@ function Topologies.dss_transform!( end function dss_transform_kernel!( - pperimeter_data::AbstractArray{FT, 4}, + perimeter_data::DataLayouts.VIFH, pdata::Union{AbstractArray{FT, 4}, AbstractArray{FT, 5}}, p∂ξ∂x::Union{AbstractArray{FT, 4}, AbstractArray{FT, 5}}, p∂x∂ξ::Union{AbstractArray{FT, 4}, AbstractArray{FT, 5}}, - nmetric::Int, + ::Val{nmetric}, pweight::AbstractArray{FT, 4}, perimeter::Topologies.Perimeter2D{Nq}, scalarfidx::AbstractVector{Int}, @@ -243,10 +245,12 @@ function dss_transform_kernel!( covariant123fidx::AbstractVector{Int}, contravariant123fidx::AbstractVector{Int}, localelems::AbstractVector{Int}, -) where {FT <: AbstractFloat, Nq} - gidx = threadIdx().x + (blockIdx().x - 1) * blockDim().x - (nlevels, nperimeter, nfid, nelems) = size(pperimeter_data) - nlocalelems = length(localelems) + ::Val{nlocalelems}, +) where {FT <: AbstractFloat, Nq, nmetric, nlocalelems} + pperimeter_data = parent(perimeter_data) + gidx = threadIdx().x + (blockIdx().x - Int32(1)) * blockDim().x + (nlevels, nperimeter, nfid, nelems) = + DataLayouts.farray_size(perimeter_data) if gidx ≤ nlevels * nperimeter * nlocalelems sizet = (nlevels, nperimeter, nlocalelems) sizet_data = (nlevels, Nq, Nq, nfid, nelems) @@ -393,12 +397,11 @@ function Topologies.dss_untransform!( p∂x∂ξ = parent(∂x∂ξ) p∂ξ∂x = parent(∂ξ∂x) nmetric = cld(length(p∂ξ∂x), prod(size(∂ξ∂x))) - pperimeter_data = parent(perimeter_data) - (nlevels, nperimeter, _, _) = size(pperimeter_data) + (nlevels, nperimeter, _, _) = DataLayouts.array_size(perimeter_data) nitems = nlevels * nperimeter * nlocalelems nthreads, nblocks = _configure_threadblock(nitems) args = ( - pperimeter_data, + perimeter_data, pdata, p∂ξ∂x, p∂x∂ξ, @@ -410,11 +413,11 @@ function Topologies.dss_untransform!( covariant123fidx, contravariant123fidx, localelems, + Val(nlocalelems), ) auto_launch!( dss_untransform_kernel!, - args, - pperimeter_data; + args; threads_s = (nthreads), blocks_s = (nblocks), ) @@ -423,7 +426,7 @@ function Topologies.dss_untransform!( end function dss_untransform_kernel!( - pperimeter_data::AbstractArray{FT, 4}, + perimeter_data::DataLayouts.VIFH, pdata::Union{AbstractArray{FT, 4}, AbstractArray{FT, 5}}, p∂ξ∂x::Union{AbstractArray{FT, 4}, AbstractArray{FT, 5}}, p∂x∂ξ::Union{AbstractArray{FT, 4}, AbstractArray{FT, 5}}, @@ -435,10 +438,12 @@ function dss_untransform_kernel!( covariant123fidx::AbstractVector{Int}, contravariant123fidx::AbstractVector{Int}, localelems::AbstractVector{Int}, -) where {FT <: AbstractFloat, Nq} - gidx = threadIdx().x + (blockIdx().x - 1) * blockDim().x - (nlevels, nperimeter, nfid, nelems) = size(pperimeter_data) - nlocalelems = length(localelems) + ::Val{nlocalelems}, +) where {FT <: AbstractFloat, Nq, nlocalelems} + gidx = threadIdx().x + (blockIdx().x - Int32(1)) * blockDim().x + (nlevels, nperimeter, nfid, nelems) = + DataLayouts.farray_size(perimeter_data) + pperimeter_data = parent(perimeter_data) if gidx ≤ nlevels * nperimeter * nlocalelems sizet = (nlevels, nperimeter, nlocalelems) sizet_data = (nlevels, Nq, Nq, nfid, nelems) @@ -533,21 +538,20 @@ function Topologies.dss_local_ghost!( ) nghostvertices = length(topology.ghost_vertex_offset) - 1 if nghostvertices > 0 - pperimeter_data = parent(perimeter_data) - (nlevels, nperimeter, nfid, nelems) = size(pperimeter_data) + (nlevels, nperimeter, nfid, nelems) = + DataLayouts.farray_size(perimeter_data) max_threads = 256 nitems = nlevels * nfid * nghostvertices nthreads, nblocks = _configure_threadblock(nitems) args = ( - pperimeter_data, + perimeter_data, topology.ghost_vertices, topology.ghost_vertex_offset, perimeter, ) auto_launch!( dss_local_ghost_kernel!, - args, - pperimeter_data; + args; threads_s = (nthreads), blocks_s = (nblocks), ) @@ -556,13 +560,15 @@ function Topologies.dss_local_ghost!( end function dss_local_ghost_kernel!( - pperimeter_data::AbstractArray{FT, 4}, + perimeter_data::DataLayouts.VIFH, ghost_vertices, ghost_vertex_offset, perimeter::Topologies.Perimeter2D{Nq}, -) where {FT <: AbstractFloat, Nq} - gidx = threadIdx().x + (blockIdx().x - 1) * blockDim().x - (nlevels, nperimeter, nfidx, _) = size(pperimeter_data) +) where {Nq} + gidx = threadIdx().x + (blockIdx().x - Int32(1)) * blockDim().x + pperimeter_data = parent(perimeter_data) + FT = eltype(pperimeter_data) + (nlevels, nperimeter, nfidx, _) = DataLayouts.farray_size(perimeter_data) nghostvertices = length(ghost_vertex_offset) - 1 if gidx ≤ nlevels * nfidx * nghostvertices sizev = (nlevels, nfidx, nghostvertices) @@ -594,17 +600,16 @@ function Topologies.fill_send_buffer!( synchronize = true, ) (; perimeter_data, send_buf_idx, send_data) = dss_buffer - pperimeter_data = parent(perimeter_data) - (nlevels, nperimeter, nfid, nelems) = size(pperimeter_data) + (nlevels, nperimeter, nfid, nelems) = + DataLayouts.farray_size(perimeter_data) nsend = size(send_buf_idx, 1) if nsend > 0 nitems = nsend * nlevels * nfid nthreads, nblocks = _configure_threadblock(nitems) - args = (send_data, send_buf_idx, pperimeter_data) + args = (send_data, send_buf_idx, perimeter_data, Val(nsend)) auto_launch!( fill_send_buffer_kernel!, - args, - pperimeter_data; + args; threads_s = (nthreads), blocks_s = (nblocks), ) @@ -618,11 +623,12 @@ end function fill_send_buffer_kernel!( send_data::AbstractArray{FT, 1}, send_buf_idx::AbstractArray{I, 2}, - pperimeter_data::AbstractArray{FT, 4}, -) where {FT <: AbstractFloat, I <: Int} - gidx = threadIdx().x + (blockIdx().x - 1) * blockDim().x - (nlevels, _, nfid, nelems) = size(pperimeter_data) - nsend = size(send_buf_idx, 1) + perimeter_data::AbstractData, + ::Val{nsend}, +) where {FT <: AbstractFloat, I <: Int, nsend} + gidx = threadIdx().x + (blockIdx().x - Int32(1)) * blockDim().x + (nlevels, _, nfid, nelems) = DataLayouts.farray_size(perimeter_data) + pperimeter_data = parent(perimeter_data) #sizet = (nsend, nlevels, nfid) sizet = (nlevels, nfid, nsend) #if gidx ≤ nsend * nlevels * nfid @@ -642,17 +648,16 @@ function Topologies.load_from_recv_buffer!( dss_buffer::Topologies.DSSBuffer, ) (; perimeter_data, recv_buf_idx, recv_data) = dss_buffer - pperimeter_data = parent(perimeter_data) - (nlevels, nperimeter, nfid, nelems) = size(pperimeter_data) + (nlevels, nperimeter, nfid, nelems) = + DataLayouts.farray_size(perimeter_data) nrecv = size(recv_buf_idx, 1) if nrecv > 0 nitems = nrecv * nlevels * nfid nthreads, nblocks = _configure_threadblock(nitems) - args = (pperimeter_data, recv_data, recv_buf_idx) + args = (perimeter_data, recv_data, recv_buf_idx, Val(nrecv)) auto_launch!( load_from_recv_buffer_kernel!, - args, - pperimeter_data; + args; threads_s = (nthreads), blocks_s = (nblocks), ) @@ -661,13 +666,14 @@ function Topologies.load_from_recv_buffer!( end function load_from_recv_buffer_kernel!( - pperimeter_data::AbstractArray{FT, 4}, + perimeter_data::AbstractData, recv_data::AbstractArray{FT, 1}, recv_buf_idx::AbstractArray{I, 2}, -) where {FT <: AbstractFloat, I <: Int} - gidx = threadIdx().x + (blockIdx().x - 1) * blockDim().x - nlevels, _, nfid, nelems = size(pperimeter_data) - nrecv = size(recv_buf_idx, 1) + ::Val{nrecv}, +) where {FT <: AbstractFloat, I <: Int, nrecv} + gidx = threadIdx().x + (blockIdx().x - Int32(1)) * blockDim().x + pperimeter_data = parent(perimeter_data) + (nlevels, _, nfid, nelems) = DataLayouts.farray_size(perimeter_data) #sizet = (nrecv, nlevels, nfid) sizet = (nlevels, nfid, nrecv) #if gidx ≤ nrecv * nlevels * nfid @@ -691,12 +697,11 @@ function Topologies.dss_ghost!( ) nghostvertices = length(topology.ghost_vertex_offset) - 1 if nghostvertices > 0 - pperimeter_data = parent(perimeter_data) - nlevels, _, nfidx, _ = size(pperimeter_data) + (nlevels, _, nfidx, _) = DataLayouts.farray_size(perimeter_data) nitems = nlevels * nfidx * nghostvertices nthreads, nblocks = _configure_threadblock(nitems) args = ( - pperimeter_data, + perimeter_data, topology.ghost_vertices, topology.ghost_vertex_offset, topology.repr_ghost_vertex, @@ -704,8 +709,7 @@ function Topologies.dss_ghost!( ) auto_launch!( dss_ghost_kernel!, - args, - pperimeter_data; + args; threads_s = (nthreads), blocks_s = (nblocks), ) @@ -714,14 +718,16 @@ function Topologies.dss_ghost!( end function dss_ghost_kernel!( - pperimeter_data::AbstractArray{FT, 4}, + perimeter_data::AbstractData, ghost_vertices, ghost_vertex_offset, repr_ghost_vertex, perimeter::Topologies.Perimeter2D{Nq}, -) where {FT <: AbstractFloat, Nq} - gidx = threadIdx().x + (blockIdx().x - 1) * blockDim().x - nlevels, _, nfidx, _ = size(pperimeter_data) +) where {Nq} + pperimeter_data = parent(perimeter_data) + FT = eltype(pperimeter_data) + gidx = threadIdx().x + (blockIdx().x - Int32(1)) * blockDim().x + (nlevels, _, nfidx, _) = DataLayouts.farray_size(perimeter_data) nghostvertices = length(ghost_vertex_offset) - 1 if gidx ≤ nlevels * nfidx * nghostvertices diff --git a/src/DataLayouts/DataLayouts.jl b/src/DataLayouts/DataLayouts.jl index aefeae837e..97c58d46e5 100644 --- a/src/DataLayouts/DataLayouts.jl +++ b/src/DataLayouts/DataLayouts.jl @@ -72,14 +72,14 @@ end @inline array_length(data::AbstractData) = prod(size(parent(data))) """ - (Ni, Nj, Nv, _, Nh) = universal_size(data::AbstractData) + (Ni, Nj, _, Nv, Nh) = universal_size(data::AbstractData) A tuple of compile-time known type parameters, corresponding to `UniversalSize`. The field dimension is excluded and is returned as 1. """ @inline universal_size(::UniversalSize{Ni, Nj, Nv, Nh}) where {Ni, Nj, Nv, Nh} = - (Ni, Nj, Nv, 1, Nh) + (Ni, Nj, 1, Nv, Nh) """ get_N(::AbstractData) @@ -116,8 +116,6 @@ Statically returns `Nh`. @inline get_Nv(data::AbstractData) = get_Nv(UniversalSize(data)) @inline get_N(data::AbstractData) = get_N(UniversalSize(data)) -@inline universal_size(data::AbstractData) = universal_size(UniversalSize(data)) - function Base.show(io::IO, data::AbstractData) indent_width = 2 (rows, cols) = displaysize(io) @@ -1276,6 +1274,51 @@ type parameters. @inline union_all(::Type{<:IH1JH2}) = IH1JH2 @inline union_all(::Type{<:IV1JH2}) = IV1JH2 +""" + array_size(data::AbstractData, [dim]) + array_size(::Type{<:AbstractData}, [dim]) + +This is an internal function, please do not use outside of ClimaCore. + +Returns the size of the backing array, with the field dimension set to 1 + +This function is helpful for writing generic +code, when reconstructing new datalayouts with new +type parameters. +""" +@inline array_size(data::AbstractData, i::Integer) = array_size(data)[i] +@inline array_size(::IJKFVH{S, Nij, Nk, Nv, Nh}) where {S, Nij, Nk, Nv, Nh} = (Nij, Nij, Nk, 1, Nv, Nh) +@inline array_size(::IJFH{S, Nij, Nh}) where {S, Nij, Nh} = (Nij, Nij, 1, Nh) +@inline array_size(::IFH{S, Ni, Nh}) where {S, Ni, Nh} = (Ni, 1, Nh) +@inline array_size(::DataF{S}) where {S} = (1,) +@inline array_size(::IJF{S, Nij}) where {S, Nij} = (Nij, Nij, 1) +@inline array_size(::IF{S, Ni}) where {S, Ni} = (Ni, 1) +@inline array_size(::VF{S, Nv}) where {S, Nv} = (Nv, 1) +@inline array_size(::VIJFH{S, Nv, Nij, Nh}) where {S, Nv, Nij, Nh} = (Nv, Nij, Nij, 1, Nh) +@inline array_size(::VIFH{S, Nv, Ni, Nh}) where {S, Nv, Ni, Nh} = (Nv, Ni, 1, Nh) + +""" + farray_size(data::AbstractData) + +This is an internal function, please do not use outside of ClimaCore. + +Returns the size of the backing array, including the field dimension + +This function is helpful for writing generic +code, when reconstructing new datalayouts with new +type parameters. +""" +@inline farray_size(data::AbstractData, i::Integer) = farray_size(data)[i] +@inline farray_size(data::IJKFVH{S, Nij, Nk, Nv, Nh}) where {S, Nij, Nk, Nv, Nh} = (Nij, Nij, Nk, ncomponents(data), Nv, Nh) +@inline farray_size(data::IJFH{S, Nij, Nh}) where {S, Nij, Nh} = (Nij, Nij, ncomponents(data), Nh) +@inline farray_size(data::IFH{S, Ni, Nh}) where {S, Ni, Nh} = (Ni, ncomponents(data), Nh) +@inline farray_size(data::DataF{S}) where {S} = (ncomponents(data),) +@inline farray_size(data::IJF{S, Nij}) where {S, Nij} = (Nij, Nij, ncomponents(data)) +@inline farray_size(data::IF{S, Ni}) where {S, Ni} = (Ni, ncomponents(data)) +@inline farray_size(data::VF{S, Nv}) where {S, Nv} = (Nv, ncomponents(data)) +@inline farray_size(data::VIJFH{S, Nv, Nij, Nh}) where {S, Nv, Nij, Nh} = (Nv, Nij, Nij, ncomponents(data), Nh) +@inline farray_size(data::VIFH{S, Nv, Ni, Nh}) where {S, Nv, Ni, Nh} = (Nv, Ni, ncomponents(data), Nh) + @inline slab_index(i, j) = CartesianIndex(i, j, 1, 1, 1) @inline slab_index(i) = CartesianIndex(i, 1, 1, 1, 1) @inline vindex(v) = CartesianIndex(1, 1, 1, v, 1) diff --git a/src/Topologies/dss.jl b/src/Topologies/dss.jl index 4432ffc0b6..31056ed87f 100644 --- a/src/Topologies/dss.jl +++ b/src/Topologies/dss.jl @@ -66,9 +66,7 @@ function create_dss_buffer( convert_to_array = DA isa Array ? false : true (_, _, _, Nv, Nh) = Base.size(data) Np = length(perimeter) - Nf = - length(parent(data)) == 0 ? 0 : - cld(length(parent(data)), (Nij * Nij * Nv * Nh)) + Nf = DataLayouts.ncomponents(data) nfacedof = Nij - 2 T = eltype(parent(data)) TS = _transformed_type(data, local_geometry, local_weights, DA) # extract transformed type @@ -941,7 +939,7 @@ function fill_send_buffer!( ) (; perimeter_data, send_buf_idx, send_data) = dss_buffer (Np, _, _, Nv, nelems) = size(perimeter_data) - Nf = cld(length(parent(perimeter_data)), (Nv * Np * nelems)) + Nf = DataLayouts.ncomponents(perimeter_data) pdata = parent(perimeter_data) nsend = size(send_buf_idx, 1) ctr = 1 @@ -970,7 +968,7 @@ function load_from_recv_buffer!( ) (; perimeter_data, recv_buf_idx, recv_data) = dss_buffer (Np, _, _, Nv, nelems) = size(perimeter_data) - Nf = cld(length(parent(perimeter_data)), (Nv * Np * nelems)) + Nf = DataLayouts.ncomponents(perimeter_data) pdata = parent(perimeter_data) nrecv = size(recv_buf_idx, 1) ctr = 1 diff --git a/src/Topologies/dss_transform.jl b/src/Topologies/dss_transform.jl index 1fec9d587a..16403b67d0 100644 --- a/src/Topologies/dss_transform.jl +++ b/src/Topologies/dss_transform.jl @@ -285,7 +285,7 @@ function create_ghost_buffer( ) k = stride(parent(send_data), 4) else - Nv, _, _, Nf, _ = size(parent(data)) + Nv, _, _, Nf, _ = DataLayouts.farray_size(data) send_data = DataLayouts.VIJFH{S, Nv, Nij, Topologies.nsendelems(topology)}( similar( diff --git a/test/DataLayouts/unit_copyto.jl b/test/DataLayouts/unit_copyto.jl index 0b304a4f81..1cf917fd1b 100644 --- a/test/DataLayouts/unit_copyto.jl +++ b/test/DataLayouts/unit_copyto.jl @@ -17,7 +17,7 @@ function test_copyto_float!(data) rand_data = DataLayouts.rebuild(data, similar(parent(data))) ArrayType = ClimaComms.array_type(ClimaComms.device()) parent(rand_data) .= - ArrayType(rand(eltype(parent(data)), size(parent(data)))) + ArrayType(rand(eltype(parent(data)), DataLayouts.farray_size(data))) Base.copyto!(data, rand_data) # test copyto!(::AbstractData, ::AbstractData) @test all(parent(data) .== parent(rand_data)) Base.copyto!(data, Base.Broadcast.broadcasted(+, rand_data, 1)) # test copyto!(::AbstractData, ::Broadcasted) @@ -30,7 +30,7 @@ function test_copyto!(data) rand_data = DataLayouts.rebuild(data, similar(parent(data))) ArrayType = ClimaComms.array_type(ClimaComms.device()) parent(rand_data) .= - ArrayType(rand(eltype(parent(data)), size(parent(data)))) + ArrayType(rand(eltype(parent(data)), DataLayouts.farray_size(data))) Base.copyto!(data, rand_data) # test copyto!(::AbstractData, ::AbstractData) @test all(parent(data.:1) .== parent(rand_data.:1)) @test all(parent(data.:2) .== parent(rand_data.:2)) @@ -98,7 +98,7 @@ end SubArray( parent(data), ntuple( - i -> Base.Slice(Base.OneTo(size(parent(data), i))), + i -> Base.Slice(Base.OneTo(DataLayouts.farray_size(data, i))), ndims(data), ), ), diff --git a/test/DataLayouts/unit_fill.jl b/test/DataLayouts/unit_fill.jl index fc803c0015..f8af1f022c 100644 --- a/test/DataLayouts/unit_fill.jl +++ b/test/DataLayouts/unit_fill.jl @@ -73,7 +73,10 @@ end data, SubArray( parent(data), - ntuple(i -> Base.OneTo(size(parent(data), i)), ndims(data)), + ntuple( + i -> Base.OneTo(DataLayouts.farray_size(data, i)), + ndims(data), + ), ), ) FT = Float64 @@ -119,7 +122,10 @@ end data, SubArray( parent(rdata), - ntuple(i -> Base.OneTo(size(parent(rdata), i)), ndims(rdata)), + ntuple( + i -> Base.OneTo(DataLayouts.farray_size(rdata, i)), + ndims(rdata), + ), ), ) rarray = parent(parent(newdata)) diff --git a/test/DataLayouts/unit_mapreduce.jl b/test/DataLayouts/unit_mapreduce.jl index 2da4547521..dcbf0a99a0 100644 --- a/test/DataLayouts/unit_mapreduce.jl +++ b/test/DataLayouts/unit_mapreduce.jl @@ -24,7 +24,8 @@ function test_mapreduce_1!(context, data) Random.seed!(1234) device = ClimaComms.device(context) ArrayType = ClimaComms.array_type(device) - rand_data = ArrayType(rand(eltype(parent(data)), size(parent(data)))) + rand_data = + ArrayType(rand(eltype(parent(data)), DataLayouts.farray_size(data))) parent(data) .= rand_data if device isa ClimaComms.CUDADevice @test wrapper(context, identity, min, data) == minimum(parent(data)) @@ -40,7 +41,8 @@ function test_mapreduce_2!(context, data) Random.seed!(1234) device = ClimaComms.device(context) ArrayType = ClimaComms.array_type(device) - rand_data = ArrayType(rand(eltype(parent(data)), size(parent(data)))) + rand_data = + ArrayType(rand(eltype(parent(data)), DataLayouts.farray_size(data))) parent(data) .= rand_data # mapreduce orders tuples lexicographically: # minimum(((2,3), (1,4))) # (1, 4) @@ -116,7 +118,10 @@ end data, SubArray( parent(data), - ntuple(i -> Base.OneTo(size(parent(data), i)), ndims(data)), + ntuple( + i -> Base.OneTo(DataLayouts.farray_size(data, i)), + ndims(data), + ), ), ) FT = Float64 diff --git a/test/Operators/spectralelement/benchmark_utils.jl b/test/Operators/spectralelement/benchmark_utils.jl index 4415f42cfe..fb82644196 100644 --- a/test/Operators/spectralelement/benchmark_utils.jl +++ b/test/Operators/spectralelement/benchmark_utils.jl @@ -6,6 +6,7 @@ using LinearAlgebra: × import PrettyTables import LinearAlgebra as LA import OrderedCollections +import ClimaCore.DataLayouts import ClimaCore.Operators as Operators import ClimaCore.Domains as Domains import ClimaCore.Meshes as Meshes @@ -229,7 +230,7 @@ function setup_kernel_args(ARGS::Vector{String} = ARGS) f_comp2_buffer = Spaces.create_dss_buffer(f_comp2) f = @. Geometry.Contravariant3Vector(Geometry.WVector(ϕ)) - s = size(parent(ϕ)) + s = DataLayouts.farray_size(Fields.field_values(ϕ)) ArrayType = ClimaComms.array_type(device) ϕ_arr = ArrayType(fill(FT(1), s)) ψ_arr = ArrayType(fill(FT(2), s)) diff --git a/test/Spaces/distributed_cuda/ddss2.jl b/test/Spaces/distributed_cuda/ddss2.jl index d138034d57..32e5d53a56 100644 --- a/test/Spaces/distributed_cuda/ddss2.jl +++ b/test/Spaces/distributed_cuda/ddss2.jl @@ -108,7 +108,7 @@ pid, nprocs = ClimaComms.init(context) end #! format: on p = @allocated Spaces.weighted_dss!(y0, dss_buffer) - iamroot && @test p ≤ 8064 + iamroot && @test p ≤ 8832 #testing weighted dss on a vector field init_vectorstate(local_geometry, p) = Geometry.Covariant12Vector(1.0, -1.0) diff --git a/test/Spaces/distributed_cuda/ddss4.jl b/test/Spaces/distributed_cuda/ddss4.jl index d129a6f263..d127bdbf79 100644 --- a/test/Spaces/distributed_cuda/ddss4.jl +++ b/test/Spaces/distributed_cuda/ddss4.jl @@ -100,7 +100,7 @@ pid, nprocs = ClimaComms.init(context) end p = @allocated Spaces.weighted_dss!(y0, dss_buffer) if pid == 1 - @test p ≤ 7008 + @test p ≤ 7776 end end diff --git a/test/Spaces/opt_spaces.jl b/test/Spaces/opt_spaces.jl index a6de4e6f81..cbf7a7938b 100644 --- a/test/Spaces/opt_spaces.jl +++ b/test/Spaces/opt_spaces.jl @@ -35,19 +35,19 @@ end if ClimaComms.device(context) isa ClimaComms.CUDADevice test_n_failures(86, TU.PointSpace, context) test_n_failures(144, TU.SpectralElementSpace1D, context) - test_n_failures(1120, TU.SpectralElementSpace2D, context) + test_n_failures(1125, TU.SpectralElementSpace2D, context) test_n_failures(123, TU.ColumnCenterFiniteDifferenceSpace, context) test_n_failures(123, TU.ColumnFaceFiniteDifferenceSpace, context) - test_n_failures(1126, TU.SphereSpectralElementSpace, context) + test_n_failures(1131, TU.SphereSpectralElementSpace, context) test_n_failures(1139, TU.CenterExtrudedFiniteDifferenceSpace, context) test_n_failures(1139, TU.FaceExtrudedFiniteDifferenceSpace, context) else test_n_failures(0, TU.PointSpace, context) test_n_failures(137, TU.SpectralElementSpace1D, context) - test_n_failures(308, TU.SpectralElementSpace2D, context) + test_n_failures(310, TU.SpectralElementSpace2D, context) test_n_failures(118, TU.ColumnCenterFiniteDifferenceSpace, context) test_n_failures(118, TU.ColumnFaceFiniteDifferenceSpace, context) - test_n_failures(314, TU.SphereSpectralElementSpace, context) + test_n_failures(316, TU.SphereSpectralElementSpace, context) test_n_failures(321, TU.CenterExtrudedFiniteDifferenceSpace, context) test_n_failures(321, TU.FaceExtrudedFiniteDifferenceSpace, context) diff --git a/test/Spaces/unit_spaces.jl b/test/Spaces/unit_spaces.jl index 2b1596a9ea..3a78fb429c 100644 --- a/test/Spaces/unit_spaces.jl +++ b/test/Spaces/unit_spaces.jl @@ -52,8 +52,7 @@ on_gpu = ClimaComms.device() isa ClimaComms.CUDADevice coord_data = Spaces.coordinates_data(space) @test eltype(coord_data) == Geometry.XPoint{Float64} - array = parent(Spaces.coordinates_data(space)) - @test size(array) == (4, 1, 1) + @test DataLayouts.farray_size(Spaces.coordinates_data(space)) == (4, 1, 1) coord_slab = slab(Spaces.coordinates_data(space), 1) @test coord_slab[slab_index(1)] == Geometry.XPoint{FT}(-3) @test coord_slab[slab_index(4)] == Geometry.XPoint{FT}(5) @@ -112,17 +111,18 @@ on_gpu || @testset "extruded (2d 1×3) finite difference space" begin # Extrusion f_space = Spaces.ExtrudedFiniteDifferenceSpace(hspace, vert_face_space) c_space = Spaces.CenterExtrudedFiniteDifferenceSpace(f_space) - array = parent(Spaces.coordinates_data(c_space)) + s = DataLayouts.farray_size(Spaces.coordinates_data(c_space)) z = Fields.coordinate_field(c_space).z - @test size(array) == (10, 4, 2, 5) # 10V, 4I, 2F(x,z), 5H + @test s == (10, 4, 2, 5) # 10V, 4I, 2F(x,z), 5H @test Spaces.local_geometry_type(typeof(f_space)) <: Geometry.LocalGeometry @test Spaces.local_geometry_type(typeof(c_space)) <: Geometry.LocalGeometry # Define test col index colidx = Fields.ColumnIndex{1}((4,), 5) + z_values = Fields.field_values(z[colidx]) # Here valid `colidx` are `Fields.ColumnIndex{1}((1:4,), 1:5)` - @test size(parent(z[colidx])) == (10, 1) - @test Fields.field_values(z[colidx]) isa DataLayouts.VF + @test DataLayouts.farray_size(z_values) == (10, 1) + @test z_values isa DataLayouts.VF @test Spaces.column(z, 1, 1, 1) isa Fields.Field @test_throws BoundsError Spaces.column(z, 1, 2, 1) @test Spaces.column(z, 1, 2) isa Fields.Field @@ -214,8 +214,7 @@ end quadrature: 4-point Gauss-Legendre-Lobatto quadrature""" coord_data = Spaces.coordinates_data(space) - array = parent(coord_data) - @test size(array) == (4, 4, 2, 1) + @test DataLayouts.farray_size(coord_data) == (4, 4, 2, 1) coord_slab = slab(coord_data, 1) @test coord_slab[slab_index(1, 1)] ≈ Geometry.XYPoint{FT}(-3.0, -2.0) @test coord_slab[slab_index(4, 1)] ≈ Geometry.XYPoint{FT}(5.0, -2.0)