Merge pull request #254 from SciML/nonvector

Handle non-vector inputs
SciML · Oct 20, 2023 · a6c17cb · a6c17cb
2 parents 2fd9480 + e8778ed
commit a6c17cb
Show file tree

Hide file tree

Showing 8 changed files with 47 additions and 21 deletions.
diff --git a/Project.toml b/Project.toml
@@ -50,7 +50,7 @@ PrecompileTools = "1"
 RecursiveArrayTools = "2"
 Reexport = "0.2, 1"
 SciMLBase = "2.4"
-SimpleNonlinearSolve = "0.1.22"
+SimpleNonlinearSolve = "0.1.23"
 SparseDiffTools = "2.6"
 StaticArraysCore = "1.4"
 UnPack = "1.0"

diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
@@ -5,6 +5,7 @@ if isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@max_m
 end
 
 using DiffEqBase, LinearAlgebra, LinearSolve, SparseArrays, SparseDiffTools
+import ArrayInterface: restructure
 import ForwardDiff
 
 import ADTypes: AbstractFiniteDifferencesMode

diff --git a/src/jacobian.jl b/src/jacobian.jl
@@ -92,7 +92,7 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f, u, p, ::Val{ii
     if needsJᵀJ
         JᵀJ = __init_JᵀJ(J)
         # FIXME: This needs to be handled better for JacVec Operator
-        Jᵀfu = J' * fu
+        Jᵀfu = J' * _vec(fu)
     end
 
     linprob = LinearProblem(needsJᵀJ ? __maybe_symmetric(JᵀJ) : J,

diff --git a/src/levenberg.jl b/src/levenberg.jl
@@ -172,7 +172,7 @@ function SciMLBase.__init(prob::Union{NonlinearProblem{uType, iip},
     else
         d = similar(u)
         d .= min_damping_D
-        DᵀD = Diagonal(d)
+        DᵀD = Diagonal(_vec(d))
     end
 
     loss = internalnorm(fu1)
@@ -209,21 +209,21 @@ function perform_step!(cache::LevenbergMarquardtCache{true})
 
     # Usual Levenberg-Marquardt step ("velocity").
     # The following lines do: cache.v = -cache.mat_tmp \ cache.u_tmp
-    mul!(cache.u_tmp, J', fu1)
+    mul!(_vec(cache.u_tmp), J', _vec(fu1))
     @. cache.mat_tmp = JᵀJ + λ * DᵀD
     linres = dolinsolve(alg.precs, linsolve; A = __maybe_symmetric(cache.mat_tmp),
         b = _vec(cache.u_tmp), linu = _vec(cache.du), p = p, reltol = cache.abstol)
     cache.linsolve = linres.cache
-    @. cache.v = -cache.du
+    _vec(cache.v) .= -1 .* _vec(cache.du)
 
     # Geodesic acceleration (step_size = v + a / 2).
     @unpack v, α_geodesic, h = cache
-    f(cache.fu_tmp, u .+ h .* v, p)
+    f(cache.fu_tmp, _restructure(u, _vec(u) .+ h .* _vec(v)), p)
 
     # The following lines do: cache.a = -J \ cache.fu_tmp
-    mul!(cache.Jv, J, v)
+    mul!(_vec(cache.Jv), J, _vec(v))
     @. cache.fu_tmp = (2 / h) * ((cache.fu_tmp - fu1) / h - cache.Jv)
-    mul!(cache.u_tmp, J', cache.fu_tmp)
+    mul!(_vec(cache.u_tmp), J', _vec(cache.fu_tmp))
     # NOTE: Don't pass `A` in again, since we want to reuse the previous solve
     linres = dolinsolve(alg.precs, linsolve; b = _vec(cache.u_tmp),
         linu = _vec(cache.du), p = p, reltol = cache.abstol)
@@ -235,7 +235,7 @@ function perform_step!(cache::LevenbergMarquardtCache{true})
     # Require acceptable steps to satisfy the following condition.
     norm_v = norm(v)
     if 2 * norm(cache.a) ≤ α_geodesic * norm_v
-        @. cache.δ = v + cache.a / 2
+        _vec(cache.δ) .= _vec(v) .+ _vec(cache.a) ./ 2
         @unpack δ, loss_old, norm_v_old, v_old, b_uphill = cache
         f(cache.fu_tmp, u .+ δ, p)
         cache.stats.nf += 1
@@ -251,7 +251,7 @@ function perform_step!(cache::LevenbergMarquardtCache{true})
                 return nothing
             end
             cache.fu1 .= cache.fu_tmp
-            cache.v_old .= v
+            _vec(cache.v_old) .= _vec(v)
             cache.norm_v_old = norm_v
             cache.loss_old = loss
             cache.λ_factor = 1 / cache.damping_decrease_factor
@@ -289,7 +289,7 @@ function perform_step!(cache::LevenbergMarquardtCache{false})
         cache.v = -cache.mat_tmp \ (J' * fu1)
     else
         linres = dolinsolve(alg.precs, linsolve; A = -__maybe_symmetric(cache.mat_tmp),
-            b = _vec(J' * fu1), linu = _vec(cache.v), p, reltol = cache.abstol)
+            b = _vec(J' * _vec(fu1)), linu = _vec(cache.v), p, reltol = cache.abstol)
         cache.linsolve = linres.cache
     end
 
@@ -300,8 +300,8 @@ function perform_step!(cache::LevenbergMarquardtCache{false})
                   _vec(J' * ((2 / h) .* ((f(u .+ h .* v, p) .- fu1) ./ h .- J * v)))
     else
         linres = dolinsolve(alg.precs, linsolve;
-            b = _mutable(_vec(J' *
-                              ((2 / h) .* ((f(u .+ h .* v, p) .- fu1) ./ h .- J * v)))),
+            b = _mutable(_vec(J' *  #((2 / h) .* ((f(u .+ h .* v, p) .- fu1) ./ h .- J * v)))),
+                         _vec(((2 / h) .* ((_vec(f(u .+ h .* _restructure(u,v), p)) .- _vec(fu1)) ./ h .- J * _vec(v)))))),
             linu = _vec(cache.a), p, reltol = cache.abstol)
         cache.linsolve = linres.cache
     end
@@ -311,7 +311,7 @@ function perform_step!(cache::LevenbergMarquardtCache{false})
     # Require acceptable steps to satisfy the following condition.
     norm_v = norm(v)
     if 2 * norm(cache.a) ≤ α_geodesic * norm_v
-        cache.δ = v .+ cache.a ./ 2
+        cache.δ = _restructure(cache.δ,_vec(v) .+ _vec(cache.a) ./ 2)
         @unpack δ, loss_old, norm_v_old, v_old, b_uphill = cache
         fu_new = f(u .+ δ, p)
         cache.stats.nf += 1
@@ -327,7 +327,7 @@ function perform_step!(cache::LevenbergMarquardtCache{false})
                 return nothing
             end
             cache.fu1 = fu_new
-            cache.v_old = v
+            cache.v_old = _restructure(cache.v_old,v)
             cache.norm_v_old = norm_v
             cache.loss_old = loss
             cache.λ_factor = 1 / cache.damping_decrease_factor

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
@@ -347,7 +347,7 @@ function perform_step!(cache::TrustRegionCache{true})
     if cache.make_new_J
         jacobian!!(J, cache)
         mul!(cache.H, J', J)
-        mul!(cache.g, J', fu)
+        mul!(_vec(cache.g), J', _vec(fu))
         cache.stats.njacs += 1
 
         # do not use A = cache.H, b = _vec(cache.g) since it is equivalent
@@ -378,9 +378,9 @@ function perform_step!(cache::TrustRegionCache{false})
     if make_new_J
         J = jacobian!!(cache.J, cache)
         cache.H = J' * J
-        cache.g = J' * fu
+        cache.g = _restructure(fu, J' * _vec(fu))
         cache.stats.njacs += 1
-        cache.u_gauss_newton = -1 .* (cache.H \ cache.g)
+        cache.u_gauss_newton = -1 .* _restructure(cache.g, cache.H \ _vec(cache.g))
     end
 
     # Compute the Newton step.
@@ -419,7 +419,7 @@ function trust_region_step!(cache::TrustRegionCache)
     cache.loss_new = get_loss(fu_new)
 
     # Compute the ratio of the actual reduction to the predicted reduction.
-    cache.r = -(loss - cache.loss_new) / (dot(du, g) + dot(du, H, du) / 2)
+    cache.r = -(loss - cache.loss_new) / (dot(_vec(du), _vec(g)) + dot(_vec(du), H, _vec(du)) / 2)
     @unpack r = cache
 
     if radius_update_scheme === RadiusUpdateSchemes.Simple
@@ -597,7 +597,7 @@ function dogleg!(cache::TrustRegionCache{true})
 
     # Take intersection of steepest descent direction and trust region if Cauchy point lies outside of trust region
     l_grad = norm(cache.g) # length of the gradient
-    d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate
+    d_cauchy = l_grad^3 / dot(_vec(cache.g), cache.H, _vec(cache.g)) # distance of the cauchy point from the current iterate
     if d_cauchy >= trust_r
         @. cache.du = -(trust_r / l_grad) * cache.g # step to the end of the trust region
         return
@@ -627,7 +627,7 @@ function dogleg!(cache::TrustRegionCache{false})
 
     ## Take intersection of steepest descent direction and trust region if Cauchy point lies outside of trust region
     l_grad = norm(cache.g)
-    d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate
+    d_cauchy = l_grad^3 / dot(_vec(cache.g), cache.H, _vec(cache.g)) # distance of the cauchy point from the current iterate
     if d_cauchy > trust_r # cauchy point lies outside of trust region
         cache.du = -(trust_r / l_grad) * cache.g # step to the end of the trust region
         return

diff --git a/src/utils.jl b/src/utils.jl
@@ -74,6 +74,9 @@ end
 @inline _vec(v::Number) = v
 @inline _vec(v::AbstractVector) = v
 
+@inline _restructure(y,x) = restructure(y,x)
+@inline _restructure(y::Number,x::Number) = x
+
 DEFAULT_PRECS(W, du, u, p, t, newW, Plprev, Prprev, cachedata) = nothing, nothing
 
 function dolinsolve(precs::P, linsolve; A = nothing, linu = nothing, b = nothing,

diff --git a/test/matrix_resizing.jl b/test/matrix_resizing.jl
@@ -0,0 +1,21 @@
+using NonlinearSolve, Test
+
+ff(u, p) = u .* u .- p
+u0 = rand(2,2)
+p = 2.0
+vecprob = NonlinearProblem(ff, vec(u0), p)
+prob = NonlinearProblem(ff, u0, p)
+
+for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), RobustMultiNewton(), FastShortcutNonlinearPolyalg())
+    @test vec(solve(prob, alg).u) == solve(vecprob, alg).u
+end
+
+fiip(du, u, p) = (du .= u .* u .- p)
+u0 = rand(2,2)
+p = 2.0
+vecprob = NonlinearProblem(fiip, vec(u0), p)
+prob = NonlinearProblem(fiip, u0, p)
+
+for alg in (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(), RobustMultiNewton(), FastShortcutNonlinearPolyalg())
+    @test vec(solve(prob, alg).u) == solve(vecprob, alg).u
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -16,6 +16,7 @@ end
         @time @safetestset "Basic Tests + Some AD" include("basictests.jl")
         @time @safetestset "Sparsity Tests" include("sparse.jl")
         @time @safetestset "Polyalgs" include("polyalgs.jl")
+        @time @safetestset "Matrix Resizing" include("matrix_resizing.jl")
         @time @safetestset "Nonlinear Least Squares" include("nonlinear_least_squares.jl")
     end