From efb0865e28b960eb6012184b14a39e6ef356f0d7 Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Wed, 5 Jun 2024 16:50:55 -0400 Subject: [PATCH] Fix dispatching for some cuda kernels Fix Update test filenames, ndims on DataLayouts - fix CI Build latest version in docs Fixes for cuda kernels with empty fields --- .buildkite/pipeline.yml | 31 ++++++ .github/workflows/docs.yml | 2 +- ext/ClimaCoreCUDAExt.jl | 3 + ext/cuda/cuda_utils.jl | 17 +-- ext/cuda/data_layouts.jl | 100 ++---------------- ext/cuda/fill.jl | 30 ++++++ src/DataLayouts/DataLayouts.jl | 109 ++++++++++++++------ src/DataLayouts/broadcast.jl | 24 ++--- src/Fields/Fields.jl | 1 + test/DataLayouts/benchmark_fill.jl | 40 +++++++ test/DataLayouts/unit_fill.jl | 57 ++++++++++ test/DataLayouts/unit_ndims.jl | 32 ++++++ test/Fields/field_multi_broadcast_fusion.jl | 37 +++---- test/runtests.jl | 2 + 14 files changed, 320 insertions(+), 165 deletions(-) create mode 100644 ext/cuda/fill.jl create mode 100644 test/DataLayouts/benchmark_fill.jl create mode 100644 test/DataLayouts/unit_fill.jl create mode 100644 test/DataLayouts/unit_ndims.jl diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 1a6e7756c5..064333d98c 100755 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -77,6 +77,10 @@ steps: key: unit_data0d command: "julia --color=yes --check-bounds=yes --project=.buildkite test/DataLayouts/data0d.jl" + - label: "Unit: data_fill" + key: unit_data_fill + command: "julia --color=yes --check-bounds=yes --project=.buildkite test/DataLayouts/unit_fill.jl" + - label: "Unit: data1d" key: unit_data1d command: "julia --color=yes --check-bounds=yes --project=.buildkite test/DataLayouts/data1d.jl" @@ -103,6 +107,16 @@ steps: agents: slurm_gpus: 1 + - label: "Unit: data fill" + key: gpu_unit_data_fill + command: + - "julia --project=.buildkite -e 'using CUDA; CUDA.versioninfo()'" + - "julia --color=yes --check-bounds=yes --project=.buildkite test/DataLayouts/unit_fill.jl" + env: + CLIMACOMMS_DEVICE: "CUDA" + agents: + slurm_gpus: 1 + - group: "Unit: Geometry" steps: @@ -1060,6 +1074,23 @@ steps: key: "cpu_axis_tensor_conversion_perf_bm" command: "julia --color=yes --project=.buildkite test/Geometry/axistensor_conversion_benchmarks.jl" + - group: "Perf: DataLayouts" + steps: + + - label: "Perf: DataLayouts fill" + key: "cpu_datalayouts_fill" + command: "julia --color=yes --project=.buildkite test/DataLayouts/benchmark_fill.jl" + + - label: "Perf: DataLayouts fill" + key: "gpu_datalayouts_fill" + command: + - "julia --project=.buildkite -e 'using CUDA; CUDA.versioninfo()'" + - "julia --color=yes --project=.buildkite test/DataLayouts/benchmark_fill.jl" + env: + CLIMACOMMS_DEVICE: "CUDA" + agents: + slurm_gpus: 1 + - group: "Perf: Fields" steps: diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 264bb184cb..f054711f33 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -12,7 +12,7 @@ jobs: with: version: '1.10' - name: Install dependencies - run: julia --project=docs -e 'using Pkg; Pkg.instantiate(;verbose=true)' + run: julia --project=docs -e 'using Pkg; Pkg.develop(path="."); Pkg.instantiate(;verbose=true)' - name: Build and deploy env: GKSwstype: "100" # headless GR: https://discourse.julialang.org/t/generation-of-documentation-fails-qt-qpa-xcb-could-not-connect-to-display/60988/2 diff --git a/ext/ClimaCoreCUDAExt.jl b/ext/ClimaCoreCUDAExt.jl index 8f1e81bc8c..95dd3aeffe 100644 --- a/ext/ClimaCoreCUDAExt.jl +++ b/ext/ClimaCoreCUDAExt.jl @@ -14,6 +14,9 @@ import ClimaCore.Utilities: half import ClimaCore.RecursiveApply: ⊠, ⊞, ⊟, radd, rmul, rsub, rdiv, rmap, rzero, rmin, rmax +const CuArrayBackedTypes = + Union{CUDA.CuArray, SubArray{<:Any, <:Any, CUDA.CuArray}} + include(joinpath("cuda", "cuda_utils.jl")) include(joinpath("cuda", "data_layouts.jl")) include(joinpath("cuda", "fields.jl")) diff --git a/ext/cuda/cuda_utils.jl b/ext/cuda/cuda_utils.jl index ae2006700c..43c7b130e7 100644 --- a/ext/cuda/cuda_utils.jl +++ b/ext/cuda/cuda_utils.jl @@ -4,8 +4,9 @@ import ClimaCore.DataLayouts import ClimaCore.DataLayouts: empty_kernel_stats get_n_items(field::Fields.Field) = - prod(size(parent(Fields.field_values(field)))) -get_n_items(data::DataLayouts.AbstractData) = prod(size(parent(data))) + prod(DataLayouts.universal_size(Fields.field_values(field))) +get_n_items(data::DataLayouts.AbstractData) = + prod(DataLayouts.universal_size(parent(data))) get_n_items(arr::AbstractArray) = prod(size(parent(arr))) get_n_items(tup::Tuple) = prod(tup) @@ -47,11 +48,13 @@ function auto_launch!( ) where {F!} if auto nitems = get_n_items(data) - kernel = CUDA.@cuda always_inline = true launch = false f!(args...) - config = CUDA.launch_configuration(kernel.fun) - threads = min(nitems, config.threads) - blocks = cld(nitems, threads) - kernel(args...; threads, blocks) # This knows to use always_inline from above. + if nitems ≥ 0 + kernel = CUDA.@cuda always_inline = true launch = false f!(args...) + config = CUDA.launch_configuration(kernel.fun) + threads = min(nitems, config.threads) + blocks = cld(nitems, threads) + kernel(args...; threads, blocks) # This knows to use always_inline from above. + end else kernel = CUDA.@cuda always_inline = always_inline threads = threads_s blocks = diff --git a/ext/cuda/data_layouts.jl b/ext/cuda/data_layouts.jl index 2f8860a136..485cd217ee 100644 --- a/ext/cuda/data_layouts.jl +++ b/ext/cuda/data_layouts.jl @@ -5,15 +5,11 @@ import ClimaCore.DataLayouts: IJKFVH, IJFH, VIJFH, VIFH, IFH, IJF, IF, VF, DataF import ClimaCore.DataLayouts: IJFHStyle, VIJFHStyle, VFStyle, DataFStyle import ClimaCore.DataLayouts: promote_parent_array_type import ClimaCore.DataLayouts: parent_array_type -import ClimaCore.DataLayouts: device_from_array_type, isascalar +import ClimaCore.DataLayouts: isascalar import ClimaCore.DataLayouts: fused_copyto! import Adapt import CUDA -device_from_array_type(::Type{<:CUDA.CuArray}) = ClimaComms.CUDADevice() -device_from_array_type(::Type{<:SubArray{<:Any, <:Any, <:CUDA.CuArray}}) = - ClimaComms.CUDADevice() - parent_array_type(::Type{<:CUDA.CuArray{T, N, B} where {N}}) where {T, B} = CUDA.CuArray{T, N, B} where {N} @@ -44,24 +40,10 @@ function knl_copyto!(dest, src) return nothing end -function knl_fill!(dest, val) - i = CUDA.threadIdx().x - j = CUDA.threadIdx().y - - h = CUDA.blockIdx().x - v = CUDA.blockDim().z * (CUDA.blockIdx().y - 1) + CUDA.threadIdx().z - - if v <= size(dest, 4) - I = CartesianIndex((i, j, 1, v, h)) - @inbounds dest[I] = val - end - return nothing -end - function Base.copyto!( dest::IJFH{S, Nij}, bc::Union{IJFH{S, Nij, A}, Base.Broadcast.Broadcasted{IJFHStyle{Nij, A}}}, -) where {S, Nij, A <: CUDA.CuArray} +) where {S, Nij, A <: CuArrayBackedTypes} _, _, _, _, Nh = size(bc) if Nh > 0 auto_launch!( @@ -74,28 +56,6 @@ function Base.copyto!( end return dest end -function Base.fill!( - dest::IJFH{S, Nij, A}, - val, -) where { - S, - Nij, - A <: Union{CUDA.CuArray, SubArray{<:Any, <:Any, <:CUDA.CuArray}}, -} - _, _, _, _, Nh = size(dest) - if Nh > 0 - auto_launch!( - knl_fill!, - (dest, val), - dest; - threads_s = (Nij, Nij), - blocks_s = (Nh, 1), - ) - end - return dest -end - - function Base.copyto!( dest::VIJFH{S, Nv, Nij}, @@ -103,7 +63,7 @@ function Base.copyto!( VIJFH{S, Nv, Nij, A}, Base.Broadcast.Broadcasted{VIJFHStyle{Nv, Nij, A}}, }, -) where {S, Nv, Nij, A <: CUDA.CuArray} +) where {S, Nv, Nij, A <: CuArrayBackedTypes} _, _, _, _, Nh = size(bc) if Nv > 0 && Nh > 0 Nv_per_block = min(Nv, fld(256, Nij * Nij)) @@ -118,30 +78,11 @@ function Base.copyto!( end return dest end -function Base.fill!( - dest::VIJFH{S, Nv, Nij, A}, - val, -) where {S, Nv, Nij, A <: CUDA.CuArray} - _, _, _, _, Nh = size(dest) - if Nv > 0 && Nh > 0 - Nv_per_block = min(Nv, fld(256, Nij * Nij)) - Nv_blocks = cld(Nv, Nv_per_block) - auto_launch!( - knl_fill!, - (dest, val), - dest; - threads_s = (Nij, Nij, Nv_per_block), - blocks_s = (Nh, Nv_blocks), - ) - end - return dest -end - function Base.copyto!( dest::VF{S, Nv}, bc::Union{VF{S, Nv, A}, Base.Broadcast.Broadcasted{VFStyle{Nv, A}}}, -) where {S, Nv, A <: CUDA.CuArray} +) where {S, Nv, A <: CuArrayBackedTypes} _, _, _, _, Nh = size(dest) if Nv > 0 && Nh > 0 auto_launch!( @@ -154,19 +95,6 @@ function Base.copyto!( end return dest end -function Base.fill!(dest::VF{S, Nv, A}, val) where {S, Nv, A <: CUDA.CuArray} - _, _, _, _, Nh = size(dest) - if Nv > 0 && Nh > 0 - auto_launch!( - knl_fill!, - (dest, val), - dest; - threads_s = (1, 1), - blocks_s = (Nh, Nv), - ) - end - return dest -end function Base.copyto!( dest::DataF{S}, @@ -181,16 +109,8 @@ function Base.copyto!( ) return dest end -function Base.fill!(dest::DataF{S, A}, val) where {S, A <: CUDA.CuArray} - auto_launch!( - knl_fill!, - (dest, val), - dest; - threads_s = (1, 1), - blocks_s = (1, 1), - ) - return dest -end + +include("fill.jl") Base.@propagate_inbounds function rcopyto_at!( pair::Pair{<:AbstractData, <:Any}, @@ -236,7 +156,7 @@ end function fused_copyto!( fmbc::FusedMultiBroadcast, - dest1::VIJFH{S, Nv, Nij}, + dest1::VIJFH{S, Nv, Nij, <:CuArrayBackedTypes}, ::ClimaComms.CUDADevice, ) where {S, Nv, Nij} _, _, _, _, Nh = size(dest1) @@ -256,7 +176,7 @@ end function fused_copyto!( fmbc::FusedMultiBroadcast, - dest1::IJFH{S, Nij}, + dest1::IJFH{S, Nij, <:CuArrayBackedTypes}, ::ClimaComms.CUDADevice, ) where {S, Nij} _, _, _, _, Nh = size(dest1) @@ -273,7 +193,7 @@ function fused_copyto!( end function fused_copyto!( fmbc::FusedMultiBroadcast, - dest1::VF{S, Nv}, + dest1::VF{S, Nv, <:CuArrayBackedTypes}, ::ClimaComms.CUDADevice, ) where {S, Nv} _, _, _, _, Nh = size(dest1) @@ -291,7 +211,7 @@ end function fused_copyto!( fmbc::FusedMultiBroadcast, - dest1::DataF{S}, + dest1::DataF{S, <:CuArrayBackedTypes}, ::ClimaComms.CUDADevice, ) where {S} auto_launch!( diff --git a/ext/cuda/fill.jl b/ext/cuda/fill.jl new file mode 100644 index 0000000000..86e8a023e3 --- /dev/null +++ b/ext/cuda/fill.jl @@ -0,0 +1,30 @@ +cartesian_index(::AbstractData, inds) = CartesianIndex(inds) + +function knl_fill_flat!(dest::AbstractData, val) + n = DataLayouts.universal_size(dest) + inds = kernel_indexes(n) + if valid_range(inds, n) + I = cartesian_index(dest, inds) + @inbounds dest[I] = val + end + return nothing +end + +function cuda_fill!(dest::AbstractData, val) + (_, _, Nf, Nv, Nh) = DataLayouts.universal_size(dest) + if Nv > 0 && Nh > 0 && Nf > 0 + auto_launch!(knl_fill_flat!, (dest, val), dest; auto = true) + end + return dest +end + +#! format: off +Base.fill!(dest::IJFH{<:Any, <:Any, <:CuArrayBackedTypes}, val) = cuda_fill!(dest, val) +Base.fill!(dest::IFH{<:Any, <:Any, <:CuArrayBackedTypes}, val) = cuda_fill!(dest, val) +Base.fill!(dest::IJF{<:Any, <:Any, <:CuArrayBackedTypes}, val) = cuda_fill!(dest, val) +Base.fill!(dest::IF{<:Any, <:Any, <:CuArrayBackedTypes}, val) = cuda_fill!(dest, val) +Base.fill!(dest::VIFH{<:Any, <:Any, <:Any, <:CuArrayBackedTypes}, val) = cuda_fill!(dest, val) +Base.fill!(dest::VIJFH{<:Any, <:Any, <:Any, <:CuArrayBackedTypes}, val) = cuda_fill!(dest, val) +Base.fill!(dest::VF{<:Any, <:Any, <:CuArrayBackedTypes}, val) = cuda_fill!(dest, val) +Base.fill!(dest::DataF{<:Any, <:CuArrayBackedTypes}, val) = cuda_fill!(dest, val) +#! format: on diff --git a/src/DataLayouts/DataLayouts.jl b/src/DataLayouts/DataLayouts.jl index b71c3e0ae8..a5905c6782 100644 --- a/src/DataLayouts/DataLayouts.jl +++ b/src/DataLayouts/DataLayouts.jl @@ -1,6 +1,20 @@ """ ClimaCore.DataLayouts +Defines the following DataLayouts (see individual docs for more info): + - [`IJKFVH`](@ref) + - [`IJFH`](@ref) + - [`IFH`](@ref) + - [`DataF`](@ref) + - [`IJF`](@ref) + - [`IF`](@ref) + - [`VF`](@ref) + - [`VIJFH`](@ref) + - [`VIFH`](@ref) + - [`IH1JH2`](@ref) + - [`IV1JH2`](@ref) + + Notation: - `i,j` are horizontal node indices within an element - `k` is the vertical node index within an element @@ -29,6 +43,11 @@ abstract type AbstractData{S} end Base.size(data::AbstractData, i::Integer) = size(data)[i] +function universal_size(data::AbstractData) + s = size(data) + return (s[1], s[2], ncomponents(data), s[4], s[5]) +end + function Base.show(io::IO, data::AbstractData) indent_width = 2 (rows, cols) = displaysize(io) @@ -182,10 +201,17 @@ end # Data3D DataLayout # ================== +""" + IJKFVH{S, Nij, Nk}(array::AbstractArray{T, 6}) <: Data3D{S, Nij, Nk} + +A 3D DataLayout. TODO: Add more docs +""" struct IJKFVH{S, Nij, Nk, A} <: Data3D{S, Nij, Nk} array::A end +parent_array_type(::Type{IJKFVH{S, Nij, Nk, A}}) where {S, Nij, Nk, A} = A + function IJKFVH{S, Nij, Nk}(array::AbstractArray{T, 6}) where {S, Nij, Nk, T} @assert size(array, 1) == Nij @assert size(array, 2) == Nij @@ -244,16 +270,24 @@ end """ IJFH{S, Nij, A} <: Data2D{S, Nij} + IJFH{S,Nij}(ArrayType, nelements) + Backing `DataLayout` for 2D spectral element slabs. Element nodal point (I,J) data is contiguous for each datatype `S` struct field (F), for each 2D mesh element slab (H). + +The `ArrayType`-constructor constructs a IJFH 2D Spectral +DataLayout given the backing `ArrayType`, quadrature degrees +of freedom `Nij × Nij`, and the number of mesh elements `nelements`. """ struct IJFH{S, Nij, A} <: Data2D{S, Nij} array::A end +parent_array_type(::Type{IJFH{S, Nij, A}}) where {S, Nij, A} = A + function IJFH{S, Nij}(array::AbstractArray{T, 4}) where {S, Nij, T} @assert size(array, 1) == Nij @assert size(array, 2) == Nij @@ -282,13 +316,6 @@ function Base.fill!(data::IJFH, val) end -""" - IJFH{S,Nij}(ArrayType, nelements) - -Construct a IJFH 2D Spectral DataLayout given the backing `ArrayType`, -quadrature degrees of freedom `Nij × Nij` , -and the number of mesh elements `nelements`. -""" function IJFH{S, Nij}(ArrayType, nelements) where {S, Nij} T = eltype(ArrayType) IJFH{S, Nij}(ArrayType(undef, Nij, Nij, typesize(T, S), nelements)) @@ -401,16 +428,25 @@ Base.length(data::Data1D) = size(parent(data), 3) """ IFH{S, Ni, A} <: Data1D{S, Ni} + IFH{S,Ni}(ArrayType, nelements) Backing `DataLayout` for 1D spectral element slabs. -Element nodal point (I) data is contiguous for each datatype `S` struct field (F), -for each 1D mesh element (H). +Element nodal point (I) data is contiguous for each +datatype `S` struct field (F), for each 1D mesh element (H). + + +The `ArrayType`-constructor makes a IFH 1D Spectral +DataLayout given the backing `ArrayType`, quadrature +degrees of freedom `Ni`, and the number of mesh elements +`nelements`. """ struct IFH{S, Ni, A} <: Data1D{S, Ni} array::A end +parent_array_type(::Type{IFH{S, Ni, A}}) where {S, Ni, A} = A + function IFH{S, Ni}(array::AbstractArray{T, 3}) where {S, Ni, T} @assert size(array, 1) == Ni check_basetype(T, S) @@ -424,13 +460,6 @@ function replace_basetype(data::IFH{S, Ni}, ::Type{T}) where {S, Ni, T} return IFH{S′, Ni}(similar(array, T)) end -""" - IFH{S,Ni}(ArrayType, nelements) - -Construct a IFH 1D Spectral DataLayout given the backing `ArrayType`, -quadrature degrees of freedom `Ni` , -and the number of mesh elements `nelements`. -""" function IFH{S, Ni}(ArrayType, nelements) where {S, Ni} T = eltype(ArrayType) IFH{S, Ni}(ArrayType(undef, Ni, typesize(T, S), nelements)) @@ -507,7 +536,7 @@ end @inbounds set_struct!( parent(data), convert(S, val), - Val(3), + Val(2), CartesianIndex(i, 1, h), ) end @@ -520,7 +549,7 @@ end @inbounds set_struct!( parent(data), convert(S, val), - Val(3), + Val(2), CartesianIndex(i, 1, h), ) end @@ -541,12 +570,15 @@ struct DataF{S, A} <: Data0D{S} array::A end +parent_array_type(::Type{DataF{S, A}}) where {S, A} = A + function DataF{S}(array::AbstractVector{T}) where {S, T} check_basetype(T, S) + @assert size(array, 1) == typesize(T, S) DataF{S, typeof(array)}(array) end -function DataF{S}(ArrayType) where {S} +function DataF{S}(::Type{ArrayType}) where {S, ArrayType} T = eltype(ArrayType) DataF{S}(ArrayType(undef, typesize(T, S))) end @@ -664,6 +696,8 @@ struct IJF{S, Nij, A} <: DataSlab2D{S, Nij} array::A end +parent_array_type(::Type{IJF{S, Nij, A}}) where {S, Nij, A} = A + function IJF{S, Nij}(array::AbstractArray{T, 3}) where {S, Nij, T} @assert size(array, 1) == Nij @assert size(array, 2) == Nij @@ -804,6 +838,8 @@ struct IF{S, Ni, A} <: DataSlab1D{S, Ni} array::A end +parent_array_type(::Type{IF{S, Ni, A}}) where {S, Ni, A} = A + function IF{S, Ni}(array::AbstractArray{T, 2}) where {S, Ni, T} @assert size(array, 1) == Ni check_basetype(T, S) @@ -903,6 +939,8 @@ struct VF{S, Nv, A} <: DataColumn{S, Nv} array::A end +parent_array_type(::Type{VF{S, Nv, A}}) where {S, Nv, A} = A + function VF{S, Nv}(array::AbstractArray{T, 2}) where {S, Nv, T} check_basetype(T, S) @assert size(array, 1) == Nv @@ -1024,6 +1062,8 @@ struct VIJFH{S, Nv, Nij, A} <: Data2DX{S, Nv, Nij} array::A end +parent_array_type(::Type{VIJFH{S, Nv, Nij, A}}) where {S, Nv, Nij, A} = A + function VIJFH{S, Nv, Nij}(array::AbstractArray{T, 5}) where {S, Nv, Nij, T} @assert size(array, 2) == size(array, 3) == Nij @assert size(array, 4) == typesize(T, S) @@ -1196,6 +1236,8 @@ struct VIFH{S, Nv, Ni, A} <: Data1DX{S, Nv, Ni} array::A end +parent_array_type(::Type{VIFH{S, Nv, Ni, A}}) where {S, Nv, Ni, A} = A + function VIFH{S, Nv, Ni}(array::AbstractArray{T, 4}) where {S, Nv, Ni, T} @assert size(array, 2) == Ni @assert size(array, 3) == typesize(T, S) @@ -1346,16 +1388,18 @@ end # Special DataLayouts for regular gridding # ========================================= -struct IH1JH2{S, Nij, A} <: Data2D{S, Nij} - array::A -end - """ IH1JH2{S, Nij}(data::AbstractMatrix{S}) Stores a 2D field in a matrix using a column-major format. The primary use is for interpolation to a regular grid for ex. plotting / field output. """ +struct IH1JH2{S, Nij, A} <: Data2D{S, Nij} + array::A +end + +parent_array_type(::Type{IH1JH2{S, Nij, A}}) where {S, Nij, A} = A + function IH1JH2{S, Nij}(array::AbstractMatrix{S}) where {S, Nij} @assert size(array, 1) % Nij == 0 @assert size(array, 2) % Nij == 0 @@ -1393,16 +1437,18 @@ end return dataview end -struct IV1JH2{S, n1, Ni, A} <: Data1DX{S, n1, Ni} - array::A -end - """ IV1JH2{S, n1, Ni}(data::AbstractMatrix{S}) Stores values from an extruded 1D spectral field in a matrix using a column-major format. The primary use is for interpolation to a regular grid for ex. plotting / field output. """ +struct IV1JH2{S, n1, Ni, A} <: Data1DX{S, n1, Ni} + array::A +end + +parent_array_type(::Type{IV1JH2{S, n1, Ni, A}}) where {S, n1, Ni, A} = A + function IV1JH2{S, n1, Ni}(array::AbstractMatrix{S}) where {S, n1, Ni} @assert size(array, 2) % Ni == 0 IV1JH2{S, n1, Ni, typeof(array)}(array) @@ -1475,12 +1521,6 @@ Adapt.adapt_structure(to, data::VF{S, Nv}) where {S, Nv} = Adapt.adapt_structure(to, data::DataF{S}) where {S} = DataF{S}(Adapt.adapt(to, parent(data))) -# TODO: Should the DataLayout be device-aware? So that we can -# determine if we're multi-threaded or not? -# This is only currently used in FusedMultiBroadcast kernels -device_from_array_type(::Type{<:AbstractArray}) = ClimaComms.CPUSingleThreaded() -ClimaComms.device(data::AbstractData) = - device_from_array_type(typeof(parent(data))) empty_kernel_stats(::ClimaComms.AbstractDevice) = nothing empty_kernel_stats() = empty_kernel_stats(ClimaComms.device()) @@ -1496,6 +1536,9 @@ get_Nij(::IFH{S, Nij}) where {S, Nij} = Nij get_Nij(::IJF{S, Nij}) where {S, Nij} = Nij get_Nij(::IF{S, Nij}) where {S, Nij} = Nij +Base.ndims(data::AbstractData) = Base.ndims(typeof(data)) +Base.ndims(::Type{T}) where {T <: AbstractData} = + Base.ndims(parent_array_type(T)) """ data2array(::AbstractData) diff --git a/src/DataLayouts/broadcast.jl b/src/DataLayouts/broadcast.jl index db822a8b2e..af3d30779c 100644 --- a/src/DataLayouts/broadcast.jl +++ b/src/DataLayouts/broadcast.jl @@ -678,13 +678,12 @@ function Base.copyto!( end, ) # check_fused_broadcast_axes(fmbc) # we should already have checked the axes - fused_copyto!(fmb_inst, dest1, ClimaComms.device(dest1)) + fused_copyto!(fmb_inst, dest1) end function fused_copyto!( fmbc::FusedMultiBroadcast, dest1::VIJFH{S1, Nv1, Nij}, - ::ClimaComms.AbstractCPUDevice, ) where {S1, Nv1, Nij} _, _, _, _, Nh = size(dest1) for (dest, bc) in fmbc.pairs @@ -700,9 +699,8 @@ end function fused_copyto!( fmbc::FusedMultiBroadcast, - dest1::IJFH{S, Nij, A}, - ::ClimaComms.AbstractCPUDevice, -) where {S, Nij, A} + dest1::IJFH{S, Nij}, +) where {S, Nij} # copy contiguous columns _, _, _, Nv, Nh = size(dest1) for (dest, bc) in fmbc.pairs @@ -717,9 +715,8 @@ end function fused_copyto!( fmbc::FusedMultiBroadcast, - dest1::VIFH{S, Nv1, Ni, A}, - ::ClimaComms.AbstractCPUDevice, -) where {S, Nv1, Ni, A} + dest1::VIFH{S, Nv1, Ni}, +) where {S, Nv1, Ni} # copy contiguous columns _, _, _, _, Nh = size(dest1) for (dest, bc) in fmbc.pairs @@ -734,9 +731,8 @@ end function fused_copyto!( fmbc::FusedMultiBroadcast, - dest1::VF{S1, Nv1, A}, - ::ClimaComms.AbstractCPUDevice, -) where {S1, Nv1, A} + dest1::VF{S1, Nv1}, +) where {S1, Nv1} for (dest, bc) in fmbc.pairs @inbounds for v in 1:Nv1 I = CartesianIndex(1, 1, 1, v, 1) @@ -747,11 +743,7 @@ function fused_copyto!( return nothing end -function fused_copyto!( - fmbc::FusedMultiBroadcast, - dest::DataF{S}, - ::ClimaComms.AbstractCPUDevice, -) where {S} +function fused_copyto!(fmbc::FusedMultiBroadcast, dest::DataF{S}) where {S} for (dest, bc) in fmbc.pairs @inbounds dest[] = convert(S, bc[]) end diff --git a/src/Fields/Fields.jl b/src/Fields/Fields.jl index 5381e32020..c8c0ab7baf 100644 --- a/src/Fields/Fields.jl +++ b/src/Fields/Fields.jl @@ -143,6 +143,7 @@ const ExtrudedCubedSphereSpectralElementField3D{V, S} = Field{ } Base.propertynames(field::Field) = propertynames(getfield(field, :values)) +Base.ndims(::Type{Field{V, S}}) where {V, S} = Base.ndims(V) @inline field_values(field::Field) = getfield(field, :values) # Define the axes field to be the todata(bc) of the return field diff --git a/test/DataLayouts/benchmark_fill.jl b/test/DataLayouts/benchmark_fill.jl new file mode 100644 index 0000000000..a97007ffff --- /dev/null +++ b/test/DataLayouts/benchmark_fill.jl @@ -0,0 +1,40 @@ +#= +julia --project +using Revise; include(joinpath("test", "DataLayouts", "benchmark_fill.jl")) +=# +using Test +using ClimaCore.DataLayouts +using BenchmarkTools +import ClimaComms +@static pkgversion(ClimaComms) >= v"0.6" && ClimaComms.@import_required_backends + +function benchmarkfill!(device, data, val) + trial = @benchmark ClimaComms.@cuda_sync $device fill!($data, $val) + show(stdout, MIME("text/plain"), trial) + println() +end + +@testset "fill! with Nf = 1" begin + device = ClimaComms.device() + device_zeros(args...) = ClimaComms.array_type(device)(zeros(args...)) + FT = Float64 + S = FT + Nf = 1 + Nv = 63 + Nij = 4 + Nh = 30 + Nk = 6 +#! format: off + data = DataF{S}(device_zeros(FT,Nf)); benchmarkfill!(device, data, 3); @test all(parent(data) .== 3) + data = IJFH{S, Nij}(device_zeros(FT,Nij,Nij,Nf,Nh)); benchmarkfill!(device, data, 3); @test all(parent(data) .== 3) + data = IFH{S, Nij}(device_zeros(FT,Nij,Nf,Nh)); benchmarkfill!(device, data, 3); @test all(parent(data) .== 3) + data = IJF{S, Nij}(device_zeros(FT,Nij,Nij,Nf)); benchmarkfill!(device, data, 3); @test all(parent(data) .== 3) + data = IF{S, Nij}(device_zeros(FT,Nij,Nf)); benchmarkfill!(device, data, 3); @test all(parent(data) .== 3) + data = VF{S, Nv}(device_zeros(FT,Nv,Nf)); benchmarkfill!(device, data, 3); @test all(parent(data) .== 3) + data = VIJFH{S, Nv, Nij}(device_zeros(FT,Nv,Nij,Nij,Nf,Nh)); benchmarkfill!(device, data, 3); @test all(parent(data) .== 3) + data = VIFH{S, Nv, Nij}(device_zeros(FT,Nv,Nij,Nf,Nh)); benchmarkfill!(device, data, 3); @test all(parent(data) .== 3) +#! format: on + + # data = IJKFVH{S}(device_zeros(FT,Nij,Nij,Nk,Nf,Nh)); benchmarkfill!(device, data, 3); @test all(parent(data) .== 3) # TODO: test + # data = IH1JH2{S}(device_zeros(FT,Nij,Nij,Nk,Nf,Nh)); benchmarkfill!(device, data, 3); @test all(parent(data) .== 3) # TODO: test +end diff --git a/test/DataLayouts/unit_fill.jl b/test/DataLayouts/unit_fill.jl new file mode 100644 index 0000000000..4cf3e4c59c --- /dev/null +++ b/test/DataLayouts/unit_fill.jl @@ -0,0 +1,57 @@ +#= +julia --project +using Revise; include(joinpath("test", "DataLayouts", "fill.jl")) +=# +using Test +using ClimaCore.DataLayouts +import ClimaComms +ClimaComms.@import_required_backends + +@testset "fill! with Nf = 1" begin + device = ClimaComms.device() + device_zeros(args...) = ClimaComms.array_type(device)(zeros(args...)) + FT = Float64 + S = FT + Nf = 1 + Nv = 4 + Nij = 3 + Nh = 5 + Nk = 6 +#! format: off + data = DataF{S}(device_zeros(FT,Nf)); fill!(data, 3); @test all(parent(data) .== 3) + data = IJFH{S, Nij}(device_zeros(FT,Nij,Nij,Nf,Nh)); fill!(data, 3); @test all(parent(data) .== 3) + data = IFH{S, Nij}(device_zeros(FT,Nij,Nf,Nh)); fill!(data, 3); @test all(parent(data) .== 3) + data = IJF{S, Nij}(device_zeros(FT,Nij,Nij,Nf)); fill!(data, 3); @test all(parent(data) .== 3) + data = IF{S, Nij}(device_zeros(FT,Nij,Nf)); fill!(data, 3); @test all(parent(data) .== 3) + data = VF{S, Nv}(device_zeros(FT,Nv,Nf)); fill!(data, 3); @test all(parent(data) .== 3) + data = VIJFH{S, Nv, Nij}(device_zeros(FT,Nv,Nij,Nij,Nf,Nh)); fill!(data, 3); @test all(parent(data) .== 3) + data = VIFH{S, Nv, Nij}(device_zeros(FT,Nv,Nij,Nf,Nh)); fill!(data, 3); @test all(parent(data) .== 3) +#! format: on + # data = DataLayouts.IJKFVH{S, Nij, Nk}(device_zeros(FT,Nij,Nij,Nk,Nf,Nv,Nh)); fill!(data, (2,3)); @test all(parent(data) .== 3) # TODO: test + # data = DataLayouts.IH1JH2{S, Nij}(device_zeros(FT,2*Nij,3*Nij)); fill!(data, (2,3)); @test all(parent(data) .== 3) # TODO: test +end + +@testset "fill! with Nf > 1" begin + device = ClimaComms.device() + device_zeros(args...) = ClimaComms.array_type(device)(zeros(args...)) + FT = Float64 + S = Tuple{FT, FT} + Nf = 2 + Nv = 4 + Nij = 3 + Nh = 5 + Nk = 6 +#! format: off + data = DataF{S}(device_zeros(FT,Nf)); fill!(data, (2,3)); @test all(parent(data.:1) .== 2); @test all(parent(data.:2) .== 3) + data = IJFH{S, Nij}(device_zeros(FT,Nij,Nij,Nf,Nh)); fill!(data, (2,3)); @test all(parent(data.:1) .== 2); @test all(parent(data.:2) .== 3) + data = IFH{S, Nij}(device_zeros(FT,Nij,Nf,Nh)); fill!(data, (2,3)); @test all(parent(data.:1) .== 2); @test all(parent(data.:2) .== 3) + data = IJF{S, Nij}(device_zeros(FT,Nij,Nij,Nf)); fill!(data, (2,3)); @test all(parent(data.:1) .== 2); @test all(parent(data.:2) .== 3) + data = IF{S, Nij}(device_zeros(FT,Nij,Nf)); fill!(data, (2,3)); @test all(parent(data.:1) .== 2); @test all(parent(data.:2) .== 3) + data = VF{S, Nv}(device_zeros(FT,Nv,Nf)); fill!(data, (2,3)); @test all(parent(data.:1) .== 2); @test all(parent(data.:2) .== 3) + data = VIJFH{S, Nv, Nij}(device_zeros(FT,Nv,Nij,Nij,Nf,Nh)); fill!(data, (2,3)); @test all(parent(data.:1) .== 2); @test all(parent(data.:2) .== 3) + data = VIFH{S, Nv, Nij}(device_zeros(FT,Nv,Nij,Nf,Nh)); fill!(data, (2,3)); @test all(parent(data.:1) .== 2); @test all(parent(data.:2) .== 3) +#! format: on + # TODO: test this + # data = DataLayouts.IJKFVH{S, Nij, Nk}(device_zeros(FT,Nij,Nij,Nk,Nf,Nv,Nh)); fill!(data, (2,3)); @test all(parent(data) .== (2,3)) # TODO: test + # data = DataLayouts.IH1JH2{S, Nij}(device_zeros(FT,2*Nij,3*Nij)); fill!(data, (2,3)); @test all(parent(data) .== (2,3)) # TODO: test +end diff --git a/test/DataLayouts/unit_ndims.jl b/test/DataLayouts/unit_ndims.jl new file mode 100644 index 0000000000..15c068d74a --- /dev/null +++ b/test/DataLayouts/unit_ndims.jl @@ -0,0 +1,32 @@ +#= +julia --project +using Revise; include(joinpath("test", "DataLayouts", "ndims.jl")) +=# +using Test +using ClimaCore.DataLayouts +import ClimaComms +ClimaComms.@import_required_backends + +@testset "Base.ndims" begin + device = ClimaComms.device() + device_zeros(args...) = ClimaComms.array_type(device)(zeros(args...)) + FT = Float64 + S = FT + Nf = 1 + Nv = 4 + Nij = 3 + Nh = 5 + Nk = 6 +#! format: off + data = DataF{S}(device_zeros(FT,Nf)); @test ndims(data) == 1; @test ndims(typeof(data)) == 1 + data = IF{S, Nij}(device_zeros(FT,Nij,Nf)); @test ndims(data) == 2; @test ndims(typeof(data)) == 2 + data = VF{S, Nv}(device_zeros(FT,Nv,Nf)); @test ndims(data) == 2; @test ndims(typeof(data)) == 2 + data = IFH{S, Nij}(device_zeros(FT,Nij,Nf,Nh)); @test ndims(data) == 3; @test ndims(typeof(data)) == 3 + data = IJF{S, Nij}(device_zeros(FT,Nij,Nij,Nf)); @test ndims(data) == 3; @test ndims(typeof(data)) == 3 + data = IJFH{S, Nij}(device_zeros(FT,Nij,Nij,Nf,Nh)); @test ndims(data) == 4; @test ndims(typeof(data)) == 4 + data = VIFH{S, Nv, Nij}(device_zeros(FT,Nv,Nij,Nf,Nh)); @test ndims(data) == 4; @test ndims(typeof(data)) == 4 + data = VIJFH{S, Nv, Nij}(device_zeros(FT,Nv,Nij,Nij,Nf,Nh)); @test ndims(data) == 5; @test ndims(typeof(data)) == 5 + data = DataLayouts.IJKFVH{S, Nij, Nk}(device_zeros(FT,Nij,Nij,Nk,Nf,Nv,Nh)); @test ndims(data) == 6; @test ndims(typeof(data)) == 6 + data = DataLayouts.IH1JH2{S, Nij}(device_zeros(FT,2*Nij,3*Nij)); @test ndims(data) == 2; @test ndims(typeof(data)) == 2 +#! format: on +end diff --git a/test/Fields/field_multi_broadcast_fusion.jl b/test/Fields/field_multi_broadcast_fusion.jl index 5d6b128720..9955a507e1 100644 --- a/test/Fields/field_multi_broadcast_fusion.jl +++ b/test/Fields/field_multi_broadcast_fusion.jl @@ -72,9 +72,9 @@ function CenterExtrudedFiniteDifferenceSpaceLineHSpace( return Spaces.ExtrudedFiniteDifferenceSpace(hspace, vspace) end -function benchmark_kernel!(f!, X, Y) +function benchmark_kernel!(f!, X, Y, device) println("\n--------------------------- $(nameof(typeof(f!))) ") - trial = benchmark_kernel!(f!, X, Y, ClimaComms.device(X.x1)) + trial = benchmark_kernel!(f!, X, Y, device) show(stdout, MIME("text/plain"), trial) end benchmark_kernel!(f!, X, Y, ::ClimaComms.CUDADevice) = @@ -250,11 +250,12 @@ end @testset "FusedMultiBroadcast VIJFH and VF" begin FT = Float64 + device = ClimaComms.device() space = TU.CenterExtrudedFiniteDifferenceSpace( FT; zelem = 3, helem = 4, - context = ClimaComms.context(), + context = ClimaComms.context(device), ) X = Fields.FieldVector( x1 = rand_field(FT, space), @@ -269,11 +270,11 @@ end test_kernel!(; fused!, unfused!, X, Y) test_kernel!(; fused! = fused_bycolumn!, unfused! = unfused_bycolumn!, X, Y) - benchmark_kernel!(unfused!, X, Y) - benchmark_kernel!(fused!, X, Y) + benchmark_kernel!(unfused!, X, Y, device) + benchmark_kernel!(fused!, X, Y, device) - benchmark_kernel!(unfused_bycolumn!, X, Y) - benchmark_kernel!(fused_bycolumn!, X, Y) + benchmark_kernel!(unfused_bycolumn!, X, Y, device) + benchmark_kernel!(fused_bycolumn!, X, Y, device) nothing end @@ -306,11 +307,11 @@ end Y, ) - benchmark_kernel!(unfused!, X, Y) - benchmark_kernel!(fused!, X, Y) + benchmark_kernel!(unfused!, X, Y, device) + benchmark_kernel!(fused!, X, Y, device) - benchmark_kernel!(unfused_bycolumn!, X, Y) - benchmark_kernel!(fused_bycolumn!, X, Y) + benchmark_kernel!(unfused_bycolumn!, X, Y, device) + benchmark_kernel!(fused_bycolumn!, X, Y, device) nothing end end @@ -332,8 +333,8 @@ end y3 = IJFH_data(), ) test_kernel!(; fused!, unfused!, X, Y) - benchmark_kernel!(unfused!, X, Y) - benchmark_kernel!(fused!, X, Y) + benchmark_kernel!(unfused!, X, Y, device) + benchmark_kernel!(fused!, X, Y, device) nothing end @@ -350,8 +351,8 @@ end X = Fields.FieldVector(; x1 = VF_data(), x2 = VF_data(), x3 = VF_data()) Y = Fields.FieldVector(; y1 = VF_data(), y2 = VF_data(), y3 = VF_data()) test_kernel!(; fused!, unfused!, X, Y) - benchmark_kernel!(unfused!, X, Y) - benchmark_kernel!(fused!, X, Y) + benchmark_kernel!(unfused!, X, Y, device) + benchmark_kernel!(fused!, X, Y, device) nothing end @@ -359,7 +360,7 @@ end FT = Float64 device = ClimaComms.device() ArrayType = ClimaComms.array_type(device) - DataF_data() = DataF{FT}(ArrayType(ones(FT, 2))) + DataF_data() = DataF{FT}(ArrayType(ones(FT, 1))) X = Fields.FieldVector(; x1 = DataF_data(), x2 = DataF_data(), @@ -371,7 +372,7 @@ end y3 = DataF_data(), ) test_kernel!(; fused!, unfused!, X, Y) - benchmark_kernel!(unfused!, X, Y) - benchmark_kernel!(fused!, X, Y) + benchmark_kernel!(unfused!, X, Y, device) + benchmark_kernel!(fused!, X, Y, device) nothing end diff --git a/test/runtests.jl b/test/runtests.jl index 254656a8df..a736cbc737 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,6 +10,8 @@ ERROR: Package ClimaCore errored during testing (exit code: 541541187) Stacktrace: [1] pkgerror(msg::String) =# +@safetestset "DataLayouts fill" begin @time include("DataLayouts/unit_fill.jl") end +@safetestset "DataLayouts ndims" begin @time include("DataLayouts/unit_ndims.jl") end if !Sys.iswindows() #= @safetestset "Recursive" begin @time include("RecursiveApply/recursive_apply.jl") end