From 8bddfaeb62af0b65a40532c93722f11e552ffed2 Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Mon, 8 Jul 2024 11:45:56 -0400 Subject: [PATCH 1/3] Add LazyBroadcast to test env, refactor MatrixField BC tests --- Project.toml | 4 +- test/MatrixFields/matrix_field_test_utils.jl | 94 +++++++++++++++++++ .../test_scalar_1.jl | 36 +++++-- test/runtests.jl | 3 +- 4 files changed, 126 insertions(+), 11 deletions(-) diff --git a/Project.toml b/Project.toml index a66af86916..7217543877 100644 --- a/Project.toml +++ b/Project.toml @@ -64,6 +64,7 @@ JET = "0.9" Krylov = "0.9" KrylovKit = "0.6, 0.7, 0.8" LinearAlgebra = "1" +LazyBroadcast = "0.1" Logging = "1" MPI = "0.20" MultiBroadcastFusion = "0.3" @@ -95,6 +96,7 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" FastBroadcast = "7034ab61-46d4-4ed7-9d0f-46aef9175898" Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" +LazyBroadcast = "9dccce8e-a116-406d-9fcc-a88ed4f510c8" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" @@ -106,5 +108,5 @@ TerminalLoggers = "5d786b92-1e48-4d6f-9151-6b4477ca9bed" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Aqua", "ArgParse", "AssociatedLegendrePolynomials", "BenchmarkTools", "Combinatorics", "CountFlops", "Dates", "FastBroadcast", "Krylov", "JET", "Logging", "MPI", "OrderedCollections", "PrettyTables", "Random", "SafeTestsets", "StatsBase", "TerminalLoggers", "Test"] +test = ["Aqua", "ArgParse", "AssociatedLegendrePolynomials", "BenchmarkTools", "Combinatorics", "CountFlops", "Dates", "FastBroadcast", "Krylov", "JET", "LazyBroadcast", "Logging", "MPI", "OrderedCollections", "PrettyTables", "Random", "SafeTestsets", "StatsBase", "TerminalLoggers", "Test"] diff --git a/test/MatrixFields/matrix_field_test_utils.jl b/test/MatrixFields/matrix_field_test_utils.jl index b03f9e98e6..7ba97d9833 100644 --- a/test/MatrixFields/matrix_field_test_utils.jl +++ b/test/MatrixFields/matrix_field_test_utils.jl @@ -3,6 +3,7 @@ using JET import Random: seed! import ClimaComms +import BenchmarkTools as BT ClimaComms.@import_required_backends import ClimaCore: Geometry, @@ -253,3 +254,96 @@ const NestedType{FT} = NamedTuple{ Tuple{FT, NamedTuple{(:d,), Tuple{Tuple{FT}}}, NamedTuple{(), Tuple{}}}, }, } + +import Base.Broadcast: materialize, materialize! +import LazyBroadcast: @lazy +import BenchmarkTools as BT + +function call_ref_set_result!( + ref_result_arrays, + inputs_arrays, + temp_values_arrays, +) + for arrays in + zip(ref_result_arrays, inputs_arrays..., temp_values_arrays...) + mul!(arrays...) + end +end + +function print_time_comparison(; time, ref_time) + time_rounded = round(time; sigdigits = 2) + ref_time_rounded = round(ref_time; sigdigits = 2) + time_ratio = time / ref_time + time_ratio_rounded = round(time_ratio; sigdigits = 2) + @info "Times (ClimaCore,Array,ClimaCore/Array): = ($time_rounded, $ref_time_rounded, $time_ratio_rounded)." + return nothing +end + +function compute_max_error(result_arrays, ref_result_arrays) + return maximum(zip(result_arrays, ref_result_arrays)) do (array, ref_array) + maximum(eachindex(array, ref_array)) do ξ + abs(array[ξ] - ref_array[ξ]) + end + end +end + +function unit_test_field_broadcast_vs_array_reference( + result, + bc, + inputs_arrays; + temp_values_arrays = (), + using_cuda, + allowed_max_eps_error = 0, +) + result_arrays = MatrixFields.field2arrays(result) + ref_result_arrays = MatrixFields.field2arrays(similar(result)) + result = materialize(bc) + result₀ = copy(result) + set_result!(result, bc) + @test result == result₀ + call_ref_set_result!(ref_result_arrays, inputs_arrays, temp_values_arrays) + max_error = compute_max_error(result_arrays, ref_result_arrays) + max_eps_error = ceil(Int, max_error / eps(typeof(max_error))) + @test max_eps_error == allowed_max_eps_error + return nothing +end + +function opt_test_field_broadcast_against_array_reference( + result, + bc, + inputs_arrays; + temp_values_arrays = (), + using_cuda, +) + ref_result_arrays = MatrixFields.field2arrays(similar(result)) + ref_time = BT.@belapsed call_ref_set_result!( + $ref_result_arrays, + $inputs_arrays, + $temp_values_arrays, + ) + time = BT.@belapsed set_result!($result, $bc) + print_time_comparison(; time, ref_time) + + # Test get_result and set_result! for type instabilities, and test + # set_result! for allocations. Ignore the type instabilities in CUDA and + # the allocations they incur. + @test_opt ignored_modules = cuda_frames materialize(bc) + @test_opt ignored_modules = cuda_frames set_result!(result, bc) + using_cuda || @test (@allocated set_result!(result, bc)) == 0 + + # Test ref_set_result! for type instabilities and allocations to ensure + # that the performance comparison is fair. + @test_opt ignored_modules = cuda_frames call_ref_set_result!( + ref_result_arrays, + inputs_arrays, + temp_values_arrays, + ) + using_cuda || @test (@allocated call_ref_set_result!( + ref_result_arrays, + inputs_arrays, + temp_values_arrays, + )) == 0 + return nothing +end + +set_result!(result, bc) = (materialize!(result, bc); nothing) diff --git a/test/MatrixFields/matrix_fields_broadcasting/test_scalar_1.jl b/test/MatrixFields/matrix_fields_broadcasting/test_scalar_1.jl index 086b098c18..c1245f5715 100644 --- a/test/MatrixFields/matrix_fields_broadcasting/test_scalar_1.jl +++ b/test/MatrixFields/matrix_fields_broadcasting/test_scalar_1.jl @@ -1,11 +1,29 @@ -if !(@isdefined(test_field_broadcast_against_array_reference)) - include("test_scalar_utils.jl") +#= +julia --project +using Revise; include(joinpath("test", "MatrixFields", "matrix_fields_broadcasting", "test_scalar_1.jl")) +=# +import ClimaCore +#! format: off +if !(@isdefined(unit_test_field_broadcast_vs_array_reference)) + include(joinpath(pkgdir(ClimaCore),"test","MatrixFields","matrix_fields_broadcasting","test_scalar_utils.jl")) end +#! format: on +test_opt = get(ENV, "BUILDKITE", "") == "true" +@testset "diagonal matrix times vector" begin + bc = @lazy @. ᶜᶜmat ⋅ ᶜvec + result = materialize(bc) -test_field_broadcast_against_array_reference(; - test_name = "diagonal matrix times vector", - get_result = () -> (@. ᶜᶜmat ⋅ ᶜvec), - set_result! = result -> (@. result = ᶜᶜmat ⋅ ᶜvec), - input_fields = (ᶜᶜmat, ᶜvec), - ref_set_result! = (_result, _ᶜᶜmat, _ᶜvec) -> mul!(_result, _ᶜᶜmat, _ᶜvec), -) + inputs_arrays = map(MatrixFields.field2arrays, (ᶜᶜmat, ᶜvec)) + unit_test_field_broadcast_vs_array_reference( + result, + bc, + inputs_arrays; + using_cuda, + ) + test_opt && opt_test_field_broadcast_against_array_reference( + result, + bc, + inputs_arrays; + using_cuda, + ) +end diff --git a/test/runtests.jl b/test/runtests.jl index 3fc9f74c7f..53181fab02 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -16,7 +16,7 @@ UnitTest("DataLayouts 1D" ,"DataLayouts/data1d.jl"), UnitTest("DataLayouts 2D" ,"DataLayouts/data2d.jl"), UnitTest("DataLayouts 1dx" ,"DataLayouts/data1dx.jl"), UnitTest("DataLayouts 2dx" ,"DataLayouts/data2dx.jl"), -UnitTest("DataLayouts mapreduce" ,"DataLayouts/unit_mapreduce.jl"), +UnitTest("DataLayouts mapreduce" ,"DataLayouts/unit_mapreduce.jl"), UnitTest("Geometry" ,"Geometry/geometry.jl"), UnitTest("rmul_with_projection" ,"Geometry/rmul_with_projection.jl"), UnitTest("AxisTensors" ,"Geometry/axistensors.jl"), @@ -60,6 +60,7 @@ UnitTest("MatrixFields - BandMatrixRow" ,"MatrixFields/band_matrix_ro UnitTest("MatrixFields - field2arrays" ,"MatrixFields/field2arrays.jl"), UnitTest("MatrixFields - mat mul at boundaries" ,"MatrixFields/matrix_multiplication_at_boundaries.jl"), UnitTest("MatrixFields - field names" ,"MatrixFields/field_names.jl"), +UnitTest("MatrixFields - broadcasting (1)" ,"MatrixFields/matrix_fields_broadcasting/test_scalar_1.jl"), # UnitTest("MatrixFields - matrix field broadcast" ,"MatrixFields/matrix_field_broadcasting.jl"), # too long # UnitTest("MatrixFields - operator matrices" ,"MatrixFields/operator_matrices.jl"), # too long # UnitTest("MatrixFields - field matrix solvers" ,"MatrixFields/field_matrix_solvers.jl"), # too long From 8f6157092fbaa1174fe25a734fc332c57c9554e1 Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Mon, 8 Jul 2024 12:34:02 -0400 Subject: [PATCH 2/3] Add LazyBroadcast to perf, update manifests --- .buildkite/Manifest.toml | 9 +++++++-- .buildkite/Project.toml | 1 + perf/Project.toml | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.buildkite/Manifest.toml b/.buildkite/Manifest.toml index 8e6f623509..793f060a65 100644 --- a/.buildkite/Manifest.toml +++ b/.buildkite/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.10.4" manifest_format = "2.0" -project_hash = "353722d366e629f6f6c9f924f5abc8f8725c735e" +project_hash = "afb10d666d598a10e6e655a26664b15c4735aa72" [[deps.ADTypes]] git-tree-sha1 = "fa0822e5baee6e23081c2685ae27265dabee23d8" @@ -305,7 +305,7 @@ weakdeps = ["CUDA", "MPI"] ClimaCommsMPIExt = "MPI" [[deps.ClimaCore]] -deps = ["Adapt", "BandedMatrices", "BlockArrays", "ClimaComms", "CubedSphere", "DataStructures", "DocStringExtensions", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LinearAlgebra", "MultiBroadcastFusion", "NVTX", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "Static", "StaticArrays", "Statistics", "Unrolled"] +deps = ["Adapt", "BandedMatrices", "BlockArrays", "ClimaComms", "CubedSphere", "DataStructures", "DocStringExtensions", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LinearAlgebra", "MultiBroadcastFusion", "NVTX", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "StaticArrays", "Statistics", "Unrolled"] path = ".." uuid = "d414da3d-4745-48bb-8d80-42e94e092884" version = "0.14.9" @@ -1232,6 +1232,11 @@ weakdeps = ["StaticArrays"] deps = ["Artifacts", "Pkg"] uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" +[[deps.LazyBroadcast]] +git-tree-sha1 = "cc1c30ef453d2867048d747a35db21888d0e3f76" +uuid = "9dccce8e-a116-406d-9fcc-a88ed4f510c8" +version = "0.1.3" + [[deps.LeftChildRightSiblingTrees]] deps = ["AbstractTrees"] git-tree-sha1 = "fb6803dafae4a5d62ea5cab204b1e657d9737e7f" diff --git a/.buildkite/Project.toml b/.buildkite/Project.toml index 27a3b6122d..411b2fd456 100644 --- a/.buildkite/Project.toml +++ b/.buildkite/Project.toml @@ -27,6 +27,7 @@ IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +LazyBroadcast = "9dccce8e-a116-406d-9fcc-a88ed4f510c8" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" diff --git a/perf/Project.toml b/perf/Project.toml index 5ad9661a9a..a2a1026fb6 100644 --- a/perf/Project.toml +++ b/perf/Project.toml @@ -15,6 +15,7 @@ GilbertCurves = "88fa7841-ef32-4516-bb70-c6ec135699d9" IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +LazyBroadcast = "9dccce8e-a116-406d-9fcc-a88ed4f510c8" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" From 72faba39ddc3aaf80474ceb92417203d4ebfd5dc Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Mon, 8 Jul 2024 12:55:18 -0400 Subject: [PATCH 3/3] Bump inference failure limit --- test/Spaces/opt_spaces.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Spaces/opt_spaces.jl b/test/Spaces/opt_spaces.jl index 4ed769bcfd..7eb3d4541a 100644 --- a/test/Spaces/opt_spaces.jl +++ b/test/Spaces/opt_spaces.jl @@ -38,7 +38,7 @@ end test_n_failures(1120, TU.SpectralElementSpace2D, context) test_n_failures(123, TU.ColumnCenterFiniteDifferenceSpace, context) test_n_failures(123, TU.ColumnFaceFiniteDifferenceSpace, context) - test_n_failures(1125, TU.SphereSpectralElementSpace, context) + test_n_failures(1126, TU.SphereSpectralElementSpace, context) test_n_failures(1139, TU.CenterExtrudedFiniteDifferenceSpace, context) test_n_failures(1139, TU.FaceExtrudedFiniteDifferenceSpace, context) else