From 413f40b46a3aed132cf828983e297c790ca1b25b Mon Sep 17 00:00:00 2001 From: yonatanwesen Date: Fri, 27 Oct 2023 17:34:52 -0400 Subject: [PATCH 1/5] don't allocate for inexact jacobian. just modifiy J --- src/pseudotransient.jl | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl index 64e4f258c..cd4c2931a 100644 --- a/src/pseudotransient.jl +++ b/src/pseudotransient.jl @@ -114,12 +114,17 @@ end function perform_step!(cache::PseudoTransientCache{true}) @unpack u, u_prev, fu1, f, p, alg, J, linsolve, du, alpha, tc_storage = cache jacobian!!(J, cache) - J_new = J - (1 / alpha) * I + if J isa SciMLBase.AbstractSciMLOperator + J = J - (1 / alpha) * I + else + J .= J - (1 / alpha) * I + end + #J_new = J - (1 / alpha) * I termination_condition = cache.termination_condition(tc_storage) # u = u - J \ fu - linres = dolinsolve(alg.precs, linsolve; A = J_new, b = _vec(fu1), linu = _vec(du), + linres = dolinsolve(alg.precs, linsolve; A = J, b = _vec(fu1), linu = _vec(du), p, reltol = cache.abstol) cache.linsolve = linres.cache @. u = u - du @@ -147,11 +152,13 @@ function perform_step!(cache::PseudoTransientCache{false}) termination_condition = cache.termination_condition(tc_storage) cache.J = jacobian!!(cache.J, cache) + + cache.J = cache.J - (1 / alpha) * I # u = u - J \ fu if linsolve === nothing - cache.du = fu1 / (cache.J - (1 / alpha) * I) + cache.du = fu1 / (cache.J) else - linres = dolinsolve(alg.precs, linsolve; A = cache.J - (1 / alpha) * I, + linres = dolinsolve(alg.precs, linsolve; A = cache.J, b = _vec(fu1), linu = _vec(cache.du), p, reltol = cache.abstol) cache.linsolve = linres.cache From 7efded5c71794dabeb7c7efa4716ebfee8f5893e Mon Sep 17 00:00:00 2001 From: yonatanwesen Date: Fri, 27 Oct 2023 18:33:51 -0400 Subject: [PATCH 2/5] address comments --- src/pseudotransient.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl index cd4c2931a..1784f1465 100644 --- a/src/pseudotransient.jl +++ b/src/pseudotransient.jl @@ -119,7 +119,6 @@ function perform_step!(cache::PseudoTransientCache{true}) else J .= J - (1 / alpha) * I end - #J_new = J - (1 / alpha) * I termination_condition = cache.termination_condition(tc_storage) @@ -156,7 +155,7 @@ function perform_step!(cache::PseudoTransientCache{false}) cache.J = cache.J - (1 / alpha) * I # u = u - J \ fu if linsolve === nothing - cache.du = fu1 / (cache.J) + cache.du = fu1 / cache.J else linres = dolinsolve(alg.precs, linsolve; A = cache.J, b = _vec(fu1), From 467009c17a4be48550c292673fb9fba5c1f8df90 Mon Sep 17 00:00:00 2001 From: yonatanwesen Date: Sat, 28 Oct 2023 14:58:56 -0400 Subject: [PATCH 3/5] actually avoid making allocations for inexact jacobian --- Project.toml | 6 +++--- src/NonlinearSolve.jl | 3 ++- src/pseudotransient.jl | 16 +++++++++++++--- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/Project.toml b/Project.toml index 3080b7178..5b60bafcf 100644 --- a/Project.toml +++ b/Project.toml @@ -9,6 +9,7 @@ ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471" DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56" +FastBroadcast = "7034ab61-46d4-4ed7-9d0f-46aef9175898" FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" @@ -35,9 +36,8 @@ NonlinearSolveFastLevenbergMarquardtExt = "FastLevenbergMarquardt" NonlinearSolveLeastSquaresOptimExt = "LeastSquaresOptim" [compat] -BandedMatrices = "1" ADTypes = "0.2" -ArrayInterface = "6.0.24, 7" +BandedMatrices = "1" ConcreteStructs = "0.2" DiffEqBase = "6.130" EnumX = "1" @@ -63,6 +63,7 @@ julia = "1.9" [extras] BandedMatrices = "aae01518-5342-5314-be14-df237901396f" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" FastLevenbergMarquardt = "7a0df574-e128-4d35-8cbd-3d84502bf7ce" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" @@ -79,7 +80,6 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" -DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" [targets] test = ["Enzyme", "BenchmarkTools", "SafeTestsets", "Pkg", "Test", "ForwardDiff", "StaticArrays", "Symbolics", "LinearSolve", "Random", "LinearAlgebra", "Zygote", "SparseDiffTools", "NonlinearProblemLibrary", "LeastSquaresOptim", "FastLevenbergMarquardt", "NaNMath", "BandedMatrices", "DiffEqBase"] diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 1b18666e6..16b7e4a82 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -9,11 +9,12 @@ import PrecompileTools PrecompileTools.@recompile_invalidations begin using DiffEqBase, LinearAlgebra, LinearSolve, SparseArrays, SparseDiffTools + using FastBroadcast: @.., True, False import ArrayInterface: restructure import ADTypes: AbstractFiniteDifferencesMode import ArrayInterface: undefmatrix, - matrix_colors, parameterless_type, ismutable, issingular + matrix_colors, parameterless_type, ismutable, issingular,fast_scalar_indexing import ConcreteStructs: @concrete import EnumX: @enumx import ForwardDiff diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl index 1784f1465..4c8ee40e7 100644 --- a/src/pseudotransient.jl +++ b/src/pseudotransient.jl @@ -114,10 +114,19 @@ end function perform_step!(cache::PseudoTransientCache{true}) @unpack u, u_prev, fu1, f, p, alg, J, linsolve, du, alpha, tc_storage = cache jacobian!!(J, cache) + inv_alpha = inv(alpha) + if J isa SciMLBase.AbstractSciMLOperator - J = J - (1 / alpha) * I + J = J - inv_alpha * I else - J .= J - (1 / alpha) * I + idxs = diagind(J) + if fast_scalar_indexing(J) + @inbounds for i in axes(J, 1) + J[i, i] = J[i, i] - inv_alpha + end + else + @.. broadcast=false @view(J[idxs])=@view(J[idxs]) - inv_alpha + end end termination_condition = cache.termination_condition(tc_storage) @@ -151,8 +160,9 @@ function perform_step!(cache::PseudoTransientCache{false}) termination_condition = cache.termination_condition(tc_storage) cache.J = jacobian!!(cache.J, cache) + inv_alpha = inv(alpha) - cache.J = cache.J - (1 / alpha) * I + cache.J = cache.J - inv_alpha * I # u = u - J \ fu if linsolve === nothing cache.du = fu1 / cache.J From 7390f71610d8cd838fd829ef8e7e393c8702ba78 Mon Sep 17 00:00:00 2001 From: yonatanwesen Date: Tue, 31 Oct 2023 15:28:09 -0400 Subject: [PATCH 4/5] fix project.toml? --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index 5b60bafcf..35159532c 100644 --- a/Project.toml +++ b/Project.toml @@ -37,6 +37,7 @@ NonlinearSolveLeastSquaresOptimExt = "LeastSquaresOptim" [compat] ADTypes = "0.2" +ArrayInterface = "6.0.24, 7" BandedMatrices = "1" ConcreteStructs = "0.2" DiffEqBase = "6.130" From abaf747734434ac4ccb07f3a4d56f494ac486194 Mon Sep 17 00:00:00 2001 From: yonatanwesen Date: Tue, 31 Oct 2023 16:42:05 -0400 Subject: [PATCH 5/5] address comments and add compat for FastBroadcast --- Project.toml | 1 + src/NonlinearSolve.jl | 2 +- src/pseudotransient.jl | 4 +--- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index 35159532c..078c37f3b 100644 --- a/Project.toml +++ b/Project.toml @@ -43,6 +43,7 @@ ConcreteStructs = "0.2" DiffEqBase = "6.130" EnumX = "1" Enzyme = "0.11" +FastBroadcast = "0.1.9, 0.2" FastLevenbergMarquardt = "0.1" FiniteDiff = "2" ForwardDiff = "0.10.3" diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl index 16b7e4a82..0f8626314 100644 --- a/src/NonlinearSolve.jl +++ b/src/NonlinearSolve.jl @@ -9,7 +9,7 @@ import PrecompileTools PrecompileTools.@recompile_invalidations begin using DiffEqBase, LinearAlgebra, LinearSolve, SparseArrays, SparseDiffTools - using FastBroadcast: @.., True, False + using FastBroadcast: @.. import ArrayInterface: restructure import ADTypes: AbstractFiniteDifferencesMode diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl index 4c8ee40e7..ca7cbcdbb 100644 --- a/src/pseudotransient.jl +++ b/src/pseudotransient.jl @@ -167,9 +167,7 @@ function perform_step!(cache::PseudoTransientCache{false}) if linsolve === nothing cache.du = fu1 / cache.J else - linres = dolinsolve(alg.precs, linsolve; A = cache.J, - b = _vec(fu1), - linu = _vec(cache.du), p, reltol = cache.abstol) + linres = dolinsolve(alg.precs, linsolve; A = cache.J,b = _vec(fu1),linu = _vec(cache.du), p, reltol = cache.abstol) cache.linsolve = linres.cache end cache.u = @. u - cache.du # `u` might not support mutation