diff --git a/Project.toml b/Project.toml index 64a483fc..667922e4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "BoundaryValueDiffEq" uuid = "764a87c0-6b3e-53db-9096-fe964310641d" -version = "5.9.1" +version = "5.10.0" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" @@ -51,7 +51,7 @@ LinearSolve = "2.21" Logging = "1.10" NonlinearSolve = "3.8.1" ODEInterface = "0.5" -OrdinaryDiffEq = "6.88.1" +OrdinaryDiffEq = "6.89.0" PreallocationTools = "0.4.24" PrecompileTools = "1.2" Preferences = "1.4" diff --git a/README.md b/README.md index a2d99f1d..c4c8cce8 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Precompilation can be controlled via `Preferences.jl` - `PrecompileMIRK` -- Precompile the MIRK2 - MIRK6 algorithms (default: `true`). - `PrecompileShooting` -- Precompile the single shooting algorithms (default: `true`). - `PrecompileMultipleShooting` -- Precompile the multiple shooting algorithms (default: `true`). - - `PrecompileMIRKNLLS` -- Precompile the MIRK2 - MIRK6 algorithms for under-determined and over-determined BVPs (default: `true`). + - `PrecompileMIRKNLLS` -- Precompile the MIRK2 - MIRK6 algorithms for under-determined and over-determined BVPs (default: `false`). - `PrecompileShootingNLLS` -- Precompile the single shooting algorithms for under-determined and over-determined BVPs (default: `true`). - `PrecompileMultipleShootingNLLS` -- Precompile the multiple shooting algorithms for under-determined and over-determined BVPs (default: `true` ). diff --git a/src/BoundaryValueDiffEq.jl b/src/BoundaryValueDiffEq.jl index cb6055b1..6cbf6df9 100644 --- a/src/BoundaryValueDiffEq.jl +++ b/src/BoundaryValueDiffEq.jl @@ -95,179 +95,6 @@ end Threads.@spawn solve(prob, alg; dt = 0.2) end end - - f1_nlls! = (du, u, p, t) -> begin - du[1] = u[2] - du[2] = -u[1] - end - - f1_nlls = (u, p, t) -> [u[2], -u[1]] - - bc1_nlls! = (resid, sol, p, t) -> begin - solₜ₁ = sol[:, 1] - solₜ₂ = sol[:, end] - resid[1] = solₜ₁[1] - resid[2] = solₜ₂[1] - 1 - resid[3] = solₜ₂[2] + 1.729109 - return nothing - end - bc1_nlls = (sol, p, t) -> [sol[:, 1][1], sol[:, end][1] - 1, sol[:, end][2] + 1.729109] - - bc1_nlls_a! = (resid, ua, p) -> (resid[1] = ua[1]) - bc1_nlls_b! = (resid, ub, p) -> (resid[1] = ub[1] - 1; - resid[2] = ub[2] + 1.729109) - - bc1_nlls_a = (ua, p) -> [ua[1]] - bc1_nlls_b = (ub, p) -> [ub[1] - 1, ub[2] + 1.729109] - - tspan = (0.0, 100.0) - u0 = [0.0, 1.0] - bcresid_prototype1 = Array{Float64}(undef, 3) - bcresid_prototype2 = (Array{Float64}(undef, 1), Array{Float64}(undef, 2)) - - probs = [ - BVProblem(BVPFunction(f1_nlls!, bc1_nlls!; bcresid_prototype = bcresid_prototype1), - u0, tspan, nlls = Val(true)), - BVProblem(BVPFunction(f1_nlls, bc1_nlls; bcresid_prototype = bcresid_prototype1), - u0, tspan, nlls = Val(true)), - TwoPointBVProblem(f1_nlls!, (bc1_nlls_a!, bc1_nlls_b!), u0, tspan; - bcresid_prototype = bcresid_prototype2, nlls = Val(true)), - TwoPointBVProblem(f1_nlls, (bc1_nlls_a, bc1_nlls_b), u0, tspan; - bcresid_prototype = bcresid_prototype2, nlls = Val(true))] - - jac_alg = BVPJacobianAlgorithm(AutoForwardDiff(; chunksize = 2)) - - nlsolvers = [LevenbergMarquardt(; disable_geodesic = Val(true)), GaussNewton()] - - algs = [] - - if Preferences.@load_preference("PrecompileMIRKNLLS", false) - for nlsolve in nlsolvers - append!(algs, [MIRK2(; jac_alg, nlsolve), MIRK6(; jac_alg, nlsolve)]) - end - end - - @compile_workload begin - @sync for prob in probs, alg in algs - Threads.@spawn solve(prob, alg; dt = 0.2, abstol = 1e-2) - end - end - - function bc2!(residual, u, p, t) - residual[1] = u(0.0)[1] - 5 - residual[2] = u(5.0)[1] - end - bc2 = (u, p, t) -> [u(0.0)[1] - 5, u(5.0)[1]] - - tspan = (0.0, 5.0) - u0 = [5.0, -3.5] - bcresid_prototype = (Array{Float64}(undef, 1), Array{Float64}(undef, 1)) - - probs = [BVProblem(BVPFunction{true}(f1!, bc2!), u0, tspan; nlls = Val(false)), - BVProblem(BVPFunction{false}(f1, bc2), u0, tspan; nlls = Val(false)), - BVProblem( - BVPFunction{true}( - f1!, (bc1_a!, bc1_b!); bcresid_prototype, twopoint = Val(true)), - u0, - tspan; - nlls = Val(false)), - BVProblem( - BVPFunction{false}(f1, (bc1_a, bc1_b); bcresid_prototype, twopoint = Val(true)), - u0, tspan; nlls = Val(false))] - - algs = [] - - if @load_preference("PrecompileShooting", true) - push!(algs, - Shooting(Tsit5(); nlsolve = NewtonRaphson(), - jac_alg = BVPJacobianAlgorithm(AutoForwardDiff(; chunksize = 2)))) - end - - if @load_preference("PrecompileMultipleShooting", true) - push!(algs, - MultipleShooting(10, - Tsit5(); - nlsolve = NewtonRaphson(), - jac_alg = BVPJacobianAlgorithm(; - bc_diffmode = AutoForwardDiff(; chunksize = 2), - nonbc_diffmode = AutoSparse(AutoForwardDiff(; chunksize = 2))))) - end - - @compile_workload begin - @sync for prob in probs, alg in algs - Threads.@spawn solve(prob, alg) - end - end - - bc1_nlls! = (resid, sol, p, t) -> begin - solₜ₁ = sol(0.0) - solₜ₂ = sol(100.0) - resid[1] = solₜ₁[1] - resid[2] = solₜ₂[1] - 1 - resid[3] = solₜ₂[2] + 1.729109 - return nothing - end - bc1_nlls = (sol, p, t) -> [sol(0.0)[1], sol(100.0)[1] - 1, sol(1.0)[2] + 1.729109] - - tspan = (0.0, 100.0) - u0 = [0.0, 1.0] - bcresid_prototype1 = Array{Float64}(undef, 3) - bcresid_prototype2 = (Array{Float64}(undef, 1), Array{Float64}(undef, 2)) - - probs = [ - BVProblem( - BVPFunction{true}(f1_nlls!, bc1_nlls!; bcresid_prototype = bcresid_prototype1), - u0, tspan; nlls = Val(true)), - BVProblem( - BVPFunction{false}(f1_nlls, bc1_nlls; bcresid_prototype = bcresid_prototype1), - u0, tspan; nlls = Val(true)), - BVProblem( - BVPFunction{true}(f1_nlls!, (bc1_nlls_a!, bc1_nlls_b!); - bcresid_prototype = bcresid_prototype2, twopoint = Val(true)), - u0, - tspan; - nlls = Val(true)), - BVProblem( - BVPFunction{false}(f1_nlls, (bc1_nlls_a, bc1_nlls_b); - bcresid_prototype = bcresid_prototype2, twopoint = Val(true)), - u0, - tspan; - nlls = Val(true))] - - algs = [] - - if @load_preference("PrecompileShootingNLLS", true) - append!(algs, - [ - Shooting( - Tsit5(); nlsolve = LevenbergMarquardt(; disable_geodesic = Val(true)), - jac_alg = BVPJacobianAlgorithm(AutoForwardDiff(; chunksize = 2))), - Shooting(Tsit5(); nlsolve = GaussNewton(), - jac_alg = BVPJacobianAlgorithm(AutoForwardDiff(; chunksize = 2)))]) - end - - if @load_preference("PrecompileMultipleShootingNLLS", true) - append!(algs, - [ - MultipleShooting(10, - Tsit5(); - nlsolve = LevenbergMarquardt(; disable_geodesic = Val(true)), - jac_alg = BVPJacobianAlgorithm(; - bc_diffmode = AutoForwardDiff(; chunksize = 2), - nonbc_diffmode = AutoSparse(AutoForwardDiff(; chunksize = 2)))), - MultipleShooting(10, - Tsit5(); - nlsolve = GaussNewton(), - jac_alg = BVPJacobianAlgorithm(; - bc_diffmode = AutoForwardDiff(; chunksize = 2), - nonbc_diffmode = AutoSparse(AutoForwardDiff(; chunksize = 2))))]) - end - - @compile_workload begin - @sync for prob in probs, alg in algs - Threads.@spawn solve(prob, alg; nlsolve_kwargs = (; abstol = 1e-2)) - end - end end export Shooting, MultipleShooting diff --git a/src/adaptivity.jl b/src/adaptivity.jl index 89a58a3f..aabfe5f4 100644 --- a/src/adaptivity.jl +++ b/src/adaptivity.jl @@ -1,5 +1,7 @@ """ - interp_eval!(y::AbstractArray, cache::MIRKCache, t) + interp_eval!(y::AbstractArray, cache::MIRKCache, t, mesh, mesh_dt) + interp_eval!(y::AbstractArray, cache::FIRKCacheExpand, t, mesh, mesh_dt) + interp_eval!(y::AbstractArray, cache::FIRKCacheNested, t, mesh, mesh_dt) After we construct an interpolant, we use interp_eval to evaluate it. """ @@ -14,13 +16,6 @@ end @views function interp_eval!( y::AbstractArray, cache::FIRKCacheExpand{iip}, t, mesh, mesh_dt) where {iip} - i = findfirst(x -> x == y, cache.y₀.u) - interp_eval!(cache.y₀.u, i, cache::FIRKCacheExpand{iip}, t, mesh, mesh_dt) - return y -end - -@views function interp_eval!( - y::AbstractArray, i::Int, cache::FIRKCacheExpand{iip}, t, mesh, mesh_dt) where {iip} j = interval(mesh, t) h = mesh_dt[j] lf = (length(cache.y₀) - 1) / (length(cache.y) - 1) # Cache length factor. We use a h corresponding to cache.y. Note that this assumes equidistributed mesh @@ -29,12 +24,11 @@ end end τ = (t - mesh[j]) - (; f, M, p, ITU) = cache - (; q_coeff, stage) = ITU + (; f, M, stage, p, ITU) = cache + (; q_coeff) = ITU K = __similar(cache.y[1].du, M, stage) - ctr_y0 = (i - 1) * (stage + 1) + 1 ctr_y = (j - 1) * (stage + 1) + 1 yᵢ = cache.y[ctr_y].du @@ -130,6 +124,34 @@ function dS_interpolate!(dy::AbstractArray, t, S_coeffs) dy .= S_coeffs * ts end +""" + s_constraints(M, h) + +Form the quartic interpolation constraint matrix, see bvp5c paper. +""" +function s_constraints(M, h) + t = vec(repeat([0.0, 1.0 * h, 0.5 * h, 0.0, 1.0 * h, 0.5 * h], 1, M)) + A = zeros(6 * M, 6 * M) + for i in 1:6 + row_start = (i - 1) * M + 1 + for k in 0:(M - 1) + for j in 1:6 + A[row_start + k, j + k * 6] = t[i + k * 6]^(j - 1) + end + end + end + for i in 4:6 + row_start = (i - 1) * M + 1 + for k in 0:(M - 1) + for j in 1:6 + A[row_start + k, j + k * 6] = j == 1.0 ? 0.0 : + (j - 1) * t[i + k * 6]^(j - 2) + end + end + end + return A +end + """ interval(mesh, t) @@ -141,6 +163,8 @@ end """ mesh_selector!(cache::MIRKCache) + mesh_selector!(cache::FIRKCacheExpand) + mesh_selector!(cache::FIRKCacheNested) Generate new mesh based on the defect. """ @@ -199,6 +223,8 @@ end """ redistribute!(cache::MIRKCache, Nsub_star, ŝ, mesh, mesh_dt) + redistribute!(cache::FIRKCacheExpand, Nsub_star, ŝ, mesh, mesh_dt) + redistribute!(cache::FIRKCacheNested, Nsub_star, ŝ, mesh, mesh_dt) Generate a new mesh based on the `ŝ`. """ @@ -235,6 +261,8 @@ end """ half_mesh!(mesh, mesh_dt) half_mesh!(cache::MIRKCache) + half_mesh!(cache::FIRKCacheExpand) + half_mesh!(cache::FIRKCacheNested) The input mesh has length of `n + 1`. Divide the original subinterval into two equal length subinterval. The `mesh` and `mesh_dt` are modified in place. @@ -260,6 +288,8 @@ end """ defect_estimate!(cache::MIRKCache) + defect_estimate!(cache::FIRKCacheExpand) + defect_estimate!(cache::FIRKCacheNested) defect_estimate use the discrete solution approximation Y, plus stages of the RK method in 'k_discrete', plus some new stages in 'k_interp' to construct diff --git a/src/algorithms.jl b/src/algorithms.jl index d1a6cc13..4b953ccb 100644 --- a/src/algorithms.jl +++ b/src/algorithms.jl @@ -224,7 +224,7 @@ for stage in (1, 2, 3, 5, 7) implicit FIRK step. Defaults to `false`. If set to `false`, the FIRK stages are solved as a part of the global residual. The general recommendation is to choose `true` for larger problems and `false` for smaller ones. - - `nest_tol`: The tolerance for the nested solver. Default is 0.0 which leads to + - `nest_tol`: The tolerance for the nested solver. Default is nothing which leads to `NonlinearSolve` automatically selecting the tolerance. - `defect_threshold`: Threshold for defect control. - `max_num_subintervals`: Number of maximal subintervals, default as 3000. @@ -277,11 +277,11 @@ for stage in (1, 2, 3, 5, 7) nlsolve::N = nothing jac_alg::J = BVPJacobianAlgorithm() nested_nlsolve::Bool = false - nest_tol::Number = 0.0 + nest_tol::Union{Number, Nothing} = nothing defect_threshold::T = 0.1 max_num_subintervals::Int = 3000 end - $(alg)(nlsolve::N, jac_alg::J; nested = false, nest_tol::Number = 0.0, defect_threshold::T = 0.1, max_num_subintervals::Int = 3000) where {N, J, T} = $(alg){ + $(alg)(nlsolve::N, jac_alg::J; nested = false, nest_tol::Union{Number, Nothing} = nothing, defect_threshold::T = 0.1, max_num_subintervals::Int = 3000) where {N, J, T} = $(alg){ N, J, T}( nlsolve, jac_alg, nested, nest_tol, defect_threshold, max_num_subintervals) end @@ -315,7 +315,7 @@ for stage in (2, 3, 4, 5) implicit FIRK step. Defaults to `false`. If set to `false`, the FIRK stages are solved as a part of the global residual. The general recommendation is to choose `true` for larger problems and `false` for smaller ones. - - `nest_tol`: The tolerance for the nested solver. Default is 0.0 which leads to + - `nest_tol`: The tolerance for the nested solver. Default is nothing which leads to `NonlinearSolve` automatically selecting the tolerance. - `defect_threshold`: Threshold for defect control. - `max_num_subintervals`: Number of maximal subintervals, default as 3000. @@ -369,11 +369,11 @@ for stage in (2, 3, 4, 5) nlsolve::N = nothing jac_alg::J = BVPJacobianAlgorithm() nested_nlsolve::Bool = false - nest_tol::Number = 0.0 + nest_tol::Union{Number, Nothing} = nothing defect_threshold::T = 0.1 max_num_subintervals::Int = 3000 end - $(alg)(nlsolve::N, jac_alg::J; nested = false, nest_tol::Number = 0.0, defect_threshold::T = 0.1, max_num_subintervals::Int = 3000) where {N, J, T} = $(alg){ + $(alg)(nlsolve::N, jac_alg::J; nested = false, nest_tol::Union{Number, Nothing} = nothing, defect_threshold::T = 0.1, max_num_subintervals::Int = 3000) where {N, J, T} = $(alg){ N, J, T}( nlsolve, jac_alg, nested, nest_tol, defect_threshold, max_num_subintervals) end @@ -407,7 +407,7 @@ for stage in (2, 3, 4, 5) implicit FIRK step. Defaults to `true`. If set to `false`, the FIRK stages are solved as a part of the global residual. The general recommendation is to choose `true` for larger problems and `false` for smaller ones. - - `nest_tol`: The tolerance for the nested solver. Default is 0.0 which leads to + - `nest_tol`: The tolerance for the nested solver. Default is nothing which leads to `NonlinearSolve` automatically selecting the tolerance. - `defect_threshold`: Threshold for defect control. - `max_num_subintervals`: Number of maximal subintervals, default as 3000. @@ -461,11 +461,11 @@ for stage in (2, 3, 4, 5) nlsolve::N = nothing jac_alg::J = BVPJacobianAlgorithm() nested_nlsolve::Bool = false - nest_tol::Number = 0.0 + nest_tol::Union{Number, Nothing} = nothing defect_threshold::T = 0.1 max_num_subintervals::Int = 3000 end - $(alg)(nlsolve::N, jac_alg::J; nested = false, nest_tol::Number = 0.0, defect_threshold::T = 0.1, max_num_subintervals::Int = 3000) where {N, J, T} = $(alg){ + $(alg)(nlsolve::N, jac_alg::J; nested = false, nest_tol::Union{Number, Nothing} = nothing, defect_threshold::T = 0.1, max_num_subintervals::Int = 3000) where {N, J, T} = $(alg){ N, J, T}( nlsolve, jac_alg, nested, nest_tol, defect_threshold, max_num_subintervals) end @@ -499,7 +499,7 @@ for stage in (2, 3, 4, 5) implicit FIRK step. Defaults to `true`. If set to `false`, the FIRK stages are solved as a part of the global residual. The general recommendation is to choose `true` for larger problems and `false` for smaller ones. - - `nest_tol`: The tolerance for the nested solver. Default is 0.0 which leads to + - `nest_tol`: The tolerance for the nested solver. Default is nothing which leads to `NonlinearSolve` automatically selecting the tolerance. - `defect_threshold`: Threshold for defect control. - `max_num_subintervals`: Number of maximal subintervals, default as 3000. @@ -553,11 +553,11 @@ for stage in (2, 3, 4, 5) nlsolve::N = nothing jac_alg::J = BVPJacobianAlgorithm() nested_nlsolve::Bool = false - nest_tol::Number = 0.0 + nest_tol::Union{Number, Nothing} = nothing defect_threshold::T = 0.1 max_num_subintervals::Int = 3000 end - $(alg)(nlsolve::N, jac_alg::J; nested = false, nest_tol::Number = 0.0, defect_threshold::T = 0.1, max_num_subintervals::Int = 3000) where {N, J, T} = $(alg){ + $(alg)(nlsolve::N, jac_alg::J; nested = false, nest_tol::Union{Number, Nothing} = nothing, defect_threshold::T = 0.1, max_num_subintervals::Int = 3000) where {N, J, T} = $(alg){ N, J, T}( nlsolve, jac_alg, nested, nest_tol, defect_threshold, max_num_subintervals) end diff --git a/src/interpolation.jl b/src/interpolation.jl index 6fe00495..876f9981 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -192,7 +192,7 @@ end for j in idx z = similar(cache.fᵢ₂_cache) - interp_eval!(z, j, id.cache, tvals[j], id.cache.mesh, id.cache.mesh_dt) + interp_eval!(z, id.cache, tvals[j], id.cache.mesh, id.cache.mesh_dt) vals[j] = z end end @@ -200,7 +200,7 @@ end @inline function interpolation(tval::Number, id::FIRKExpandInterpolation, idxs, deriv::D, p, continuity::Symbol = :left) where {D} z = similar(id.cache.fᵢ₂_cache) - interp_eval!(z, 1, id.cache, tval, id.cache.mesh, id.cache.mesh_dt) + interp_eval!(z, id.cache, tval, id.cache.mesh, id.cache.mesh_dt) return idxs !== nothing ? z[idxs] : z end @@ -210,44 +210,3 @@ end cache.mesh, u, cache) @inline __build_interpolation(cache::FIRKCacheNested, u::AbstractVector) = FIRKNestedInterpolation( cache.mesh, u, cache) - -""" - get_ymid(yᵢ, coeffs, K, h) - -Gets the interpolated middle value for a RK method, see bvp5c paper. -""" -function get_ymid(yᵢ, coeffs, K, h) - res = copy(yᵢ) - for i in axes(K, 2) - res .+= h .* K[:, i] .* coeffs[i] - end - return res -end - -""" - s_constraints(M, h) - -Form the quartic interpolation constraint matrix, see bvp5c paper. -""" -function s_constraints(M, h) - t = vec(repeat([0.0, 1.0 * h, 0.5 * h, 0.0, 1.0 * h, 0.5 * h], 1, M)) - A = zeros(6 * M, 6 * M) - for i in 1:6 - row_start = (i - 1) * M + 1 - if i <= 3 - for k in 0:(M - 1) - for j in 1:6 - A[row_start + k, j + k * 6] = t[i + k * 6]^(j - 1) - end - end - else - for k in 0:(M - 1) - for j in 1:6 - A[row_start + k, j + k * 6] = j == 1.0 ? 0.0 : - (j - 1) * t[i + k * 6]^(j - 2) - end - end - end - end - return A -end diff --git a/src/solve/firk.jl b/src/solve/firk.jl index 3cb7851d..47e6d1a7 100644 --- a/src/solve/firk.jl +++ b/src/solve/firk.jl @@ -62,27 +62,19 @@ Base.eltype(::FIRKCacheExpand{iip, T}) where {iip, T} = T function extend_y(y, N::Int, stage::Int) y_extended = similar(y.u, (N - 1) * (stage + 1) + 1) - y_extended[1] = y.u[1] - let ctr1 = 2 - for i in 2:N - for j in 1:(stage + 1) - y_extended[(ctr1)] = y.u[i] - ctr1 += 1 - end - end + for (i, ctr) in enumerate(2:(stage + 1):((N - 1) * (stage + 1) + 1)) + @views fill!(y_extended[ctr:(ctr + stage)], y.u[i + 1]) end + y_extended[1] = y.u[1] return VectorOfArray(y_extended) end -function shrink_y(y, N, M, stage) +function shrink_y(y, N, stage) y_shrink = similar(y, N) - y_shrink[1] = y[1] - let ctr = stage + 2 - for i in 2:N - y_shrink[i] = y[ctr] - ctr += (stage + 1) - end + for (i, ctr) in enumerate((stage + 2):(stage + 1):((N - 1) * (stage + 1) + 1)) + y_shrink[i + 1] = y[ctr] end + y_shrink[1] = y[1] return y_shrink end @@ -180,7 +172,7 @@ function init_nested(prob::BVProblem, alg::AbstractFIRK; dt = 0.0, K0 = repeat(avg_u0, 1, stage) # Somewhat arbitrary initialization of K nestprob_p = zeros(T, M + 2) - nest_tol = (alg.nest_tol > eps(T) ? alg.nest_tol : nothing) + nest_tol = alg.nest_tol if iip nestprob = NonlinearProblem( @@ -310,15 +302,11 @@ function SciMLBase.solve!(cache::FIRKCacheExpand) end end - # sync y and y0 caches - for i in axes(cache.y₀, 1) - cache.y[i].du .= cache.y₀.u[i] - end - - u = shrink_y([reshape(y, cache.in_size) for y in cache.y₀], - length(cache.mesh), cache.M, alg_stage(cache.alg)) + u = shrink_y( + [reshape(y, cache.in_size) for y in cache.y₀], length(cache.mesh), cache.stage) interpolation = __build_interpolation(cache, u) + odesol = DiffEqBase.build_solution( cache.prob, cache.alg, cache.mesh, u; interp = interpolation, retcode = info) return __build_solution(cache.prob, odesol, sol_nlprob) @@ -377,13 +365,12 @@ function __construct_nlproblem( cache::FIRKCacheExpand{iip}, y, loss_bc::BC, loss_collocation::C, loss::LF, ::StandardBVProblem) where {iip, BC, C, LF} (; nlsolve, jac_alg) = cache.alg + (; stage) = cache N = length(cache.mesh) - TU, ITU = constructRK(cache.alg, eltype(y)) - (; s) = TU resid_bc = cache.bcresid_prototype L = length(resid_bc) - resid_collocation = __similar(y, cache.M * (N - 1) * (TU.s + 1)) + resid_collocation = __similar(y, cache.M * (N - 1) * (stage + 1)) loss_bcₚ = (iip ? __Fix3 : Base.Fix2)(loss_bc, cache.p) loss_collocationₚ = (iip ? __Fix3 : Base.Fix2)(loss_collocation, cache.p) @@ -403,10 +390,10 @@ function __construct_nlproblem( sparse(colored_matrix.M), colored_matrix.row_colorvec, colored_matrix.col_colorvec)) else - block_size = cache.M * (s + 2) + block_size = cache.M * (stage + 2) J_full_band = BandedMatrix( - Ones{eltype(y)}( - L + cache.M * (s + 1) * (N - 1), cache.M * (s + 1) * (N - 1) + cache.M), + Ones{eltype(y)}(L + cache.M * (stage + 1) * (N - 1), + cache.M * (stage + 1) * (N - 1) + cache.M), (block_size, block_size)) __sparsity_detection_alg(__generate_sparse_jacobian_prototype( cache, cache.problem_type, y, y, cache.M, N)) @@ -451,13 +438,12 @@ function __construct_nlproblem( cache::FIRKCacheExpand{iip}, y, loss_bc::BC, loss_collocation::C, loss::LF, ::TwoPointBVProblem) where {iip, BC, C, LF} (; nlsolve, jac_alg) = cache.alg + (; stage) = cache N = length(cache.mesh) - TU, ITU = constructRK(cache.alg, eltype(y)) - (; s) = TU lossₚ = iip ? ((du, u) -> loss(du, u, cache.p)) : (u -> loss(u, cache.p)) - resid_collocation = __similar(y, cache.M * (N - 1) * (TU.s + 1)) + resid_collocation = __similar(y, cache.M * (N - 1) * (stage + 1)) resid = vcat( @view(cache.bcresid_prototype[1:prod(cache.resid_size[1])]), resid_collocation, @@ -465,10 +451,10 @@ function __construct_nlproblem( L = length(cache.bcresid_prototype) sd = if jac_alg.nonbc_diffmode isa AutoSparse - block_size = cache.M * (s + 2) + block_size = cache.M * (stage + 2) J_full_band = BandedMatrix( - Ones{eltype(y)}( - L + cache.M * (s + 1) * (N - 1), cache.M * (s + 1) * (N - 1) + cache.M), + Ones{eltype(y)}(L + cache.M * (stage + 1) * (N - 1), + cache.M * (stage + 1) * (N - 1) + cache.M), (block_size, block_size)) __sparsity_detection_alg(__generate_sparse_jacobian_prototype( cache, cache.problem_type, diff --git a/src/solve/mirk.jl b/src/solve/mirk.jl index e84a001f..15291dc4 100644 --- a/src/solve/mirk.jl +++ b/src/solve/mirk.jl @@ -111,6 +111,8 @@ end """ __expand_cache!(cache::MIRKCache) + __expand_cache!(cache::FIRKCacheNested) + __expand_cache!(cache::MIRKCacheExpand) After redistributing or halving the mesh, this function expands the required vectors to match the length of the new mesh. diff --git a/src/sparse_jacobians.jl b/src/sparse_jacobians.jl index 016b3790..7f8b0d88 100644 --- a/src/sparse_jacobians.jl +++ b/src/sparse_jacobians.jl @@ -75,23 +75,23 @@ end function __generate_sparse_jacobian_prototype( cache::FIRKCacheExpand, ::StandardBVProblem, ya, yb, M, N) - (; TU) = cache - (; s) = TU + (; stage) = cache + # Get number of nonzeros - block_size = M * (s + 1) * M * (s + 2) + block_size = M * (stage + 1) * M * (stage + 2) l = (N - 1) * block_size # Initialize Is and Js Is = Vector{Int}(undef, l) Js = Vector{Int}(undef, l) # Fill Is and Js - row_size = M * (s + 1) * (N - 1) + row_size = M * (stage + 1) * (N - 1) idx = 1 i_start = 0 j_start = 0 - i_step = M * (s + 1) - j_step = M * (s + 2) + i_step = M * (stage + 1) + j_step = M * (stage + 2) for k in 1:(N - 1) for i in 1:i_step for j in 1:j_step @@ -109,11 +109,11 @@ function __generate_sparse_jacobian_prototype( col_colorvec = Vector{Int}(undef, size(J_c, 2)) for i in eachindex(col_colorvec) - col_colorvec[i] = mod1(i, (2 * M * (s + 1)) + M) + col_colorvec[i] = mod1(i, (2 * M * (stage + 1)) + M) end row_colorvec = Vector{Int}(undef, size(J_c, 1)) for i in eachindex(row_colorvec) - row_colorvec[i] = mod1(i, (2 * M * (s + 1)) + M) + row_colorvec[i] = mod1(i, (2 * M * (stage + 1)) + M) end return ColoredMatrix(J_c, row_colorvec, col_colorvec) @@ -121,22 +121,22 @@ end function __generate_sparse_jacobian_prototype( cache::FIRKCacheExpand, ::TwoPointBVProblem, ya, yb, M, N) - (; TU) = cache - (; s) = TU + (; stage) = cache + # Get number of nonzeros - block_size = M * (s + 1) * M * (s + 2) - l = (N - 1) * block_size + M * (s + 2) * (length(ya) + length(yb)) + block_size = M * (stage + 1) * M * (stage + 2) + l = (N - 1) * block_size + M * (stage + 2) * (length(ya) + length(yb)) # Initialize Is and Js Is = Vector{Int}(undef, l) Js = Vector{Int}(undef, l) # Fill Is and Js - row_size = M * (s + 1) * (N - 1) + row_size = M * (stage + 1) * (N - 1) idx = 1 i_start = 0 j_start = 0 - i_step = M * (s + 1) - j_step = M * (s + 2) + i_step = M * (stage + 1) + j_step = M * (stage + 2) # Fill first rows for i in 1:length(ya) diff --git a/test/firk/expanded/nlls_tests.jl b/test/firk/expanded/nlls_tests.jl index bca1e822..5206d933 100644 --- a/test/firk/expanded/nlls_tests.jl +++ b/test/firk/expanded/nlls_tests.jl @@ -4,7 +4,7 @@ using BoundaryValueDiffEq, LinearAlgebra SOLVERS = [firk(; nlsolve) for firk in (RadauIIa5, LobattoIIIa4, LobattoIIIb4, LobattoIIIc4), -nlsolve in (LevenbergMarquardt(), GaussNewton(), TrustRegion())] +nlsolve in (LevenbergMarquardt(), GaussNewton())]#, TrustRegion())] SOLVERS_NAMES = ["$solver with $nlsolve" for solver in ["RadauIIa5", "LobattoIIIa4", "LobattoIIIb4", "LobattoIIIc4"],