diff --git a/Project.toml b/Project.toml
index 604c8835..4da652ec 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "BoundaryValueDiffEq"
 uuid = "764a87c0-6b3e-53db-9096-fe964310641d"
-version = "5.3.0"
+version = "5.4.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
@@ -11,6 +11,7 @@ ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
 NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
 PreallocationTools = "d236fae5-4411-538c-8e31-a6e3d9e00b46"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
@@ -41,8 +42,9 @@ BandedMatrices = "1"
 ConcreteStructs = "0.2"
 DiffEqBase = "6.135"
 ForwardDiff = "0.10"
-LinearAlgebra = "1.6"
-NonlinearSolve = "2.5"
+LinearAlgebra = "1.9"
+LinearSolve = "2"
+NonlinearSolve = "2.6.1"
 ODEInterface = "0.5"
 OrdinaryDiffEq = "6"
 PreallocationTools = "0.4"
@@ -52,7 +54,7 @@ RecursiveArrayTools = "2.38.10"
 Reexport = "0.2, 1.0"
 SciMLBase = "2.5"
 Setfield = "1"
-SparseArrays = "1.6"
+SparseArrays = "1.9"
 SparseDiffTools = "2.9"
 TruncatedStacktraces = "1"
 UnPack = "1"
@@ -61,6 +63,7 @@ julia = "1.9"
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 DiffEqDevTools = "f3b72e0c-5b89-59e1-b016-84e28bfd966d"
+LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
 ODEInterface = "54ca160b-1b9f-5127-a996-1867f4bc2a2c"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -69,4 +72,4 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["StaticArrays", "Random", "DiffEqDevTools", "OrdinaryDiffEq", "Test", "SafeTestsets", "ODEInterface", "Aqua"]
+test = ["StaticArrays", "Random", "DiffEqDevTools", "OrdinaryDiffEq", "Test", "SafeTestsets", "ODEInterface", "Aqua", "LinearSolve"]
diff --git a/README.md b/README.md
index c5bf1f83..9f114c6e 100644
--- a/README.md
+++ b/README.md
@@ -48,6 +48,9 @@ Precompilation can be controlled via `Preferences.jl`
 - `PrecompileMIRK` -- Precompile the MIRK2 - MIRK6 algorithms (default: `true`).
 - `PrecompileShooting` -- Precompile the single shooting algorithms (default: `true`). This is triggered when `OrdinaryDiffEq` is loaded.
 - `PrecompileMultipleShooting` -- Precompile the multiple shooting algorithms (default: `true`). This is triggered when `OrdinaryDiffEq` is loaded.
+- `PrecompileMIRKNLLS` -- Precompile the MIRK2 - MIRK6 algorithms for under-determined and over-determined BVPs (default: `true` on Julia Version 1.10 and above).
+- `PrecompileShootingNLLS` -- Precompile the single shooting algorithms for under-determined and over-determined BVPs (default: `true` on Julia Version 1.10 and above). This is triggered when `OrdinaryDiffEq` is loaded.
+- `PrecompileMultipleShootingNLLS` -- Precompile the multiple shooting algorithms for under-determined and over-determined BVPs (default: `true` on Julia Version 1.10 and above). This is triggered when `OrdinaryDiffEq` is loaded.
 
 To set these preferences before loading the package, do the following (replacing `PrecompileShooting` with the preference you want to set, or pass in multiple pairs to set them together):
 
diff --git a/ext/BoundaryValueDiffEqOrdinaryDiffEqExt.jl b/ext/BoundaryValueDiffEqOrdinaryDiffEqExt.jl
index 2fcf69a4..353a5efc 100644
--- a/ext/BoundaryValueDiffEqOrdinaryDiffEqExt.jl
+++ b/ext/BoundaryValueDiffEqOrdinaryDiffEqExt.jl
@@ -63,6 +63,81 @@ end
             solve(prob, alg)
         end
     end
+
+    function f1_nlls!(du, u, p, t)
+        du[1] = u[2]
+        du[2] = -u[1]
+    end
+
+    f1_nlls(u, p, t) = [u[2], -u[1]]
+
+    function bc1_nlls!(resid, sol, p, t)
+        solₜ₁ = sol(0.0)
+        solₜ₂ = sol(100.0)
+        resid[1] = solₜ₁[1]
+        resid[2] = solₜ₂[1] - 1
+        resid[3] = solₜ₂[2] + 1.729109
+        return nothing
+    end
+    bc1_nlls(sol, p, t) = [sol(0.0)[1], sol(100.0)[1] - 1, sol(100.0)[2] + 1.729109]
+
+    bc1_nlls_a!(resid, ua, p) = (resid[1] = ua[1])
+    bc1_nlls_b!(resid, ub, p) = (resid[1] = ub[1] - 1; resid[2] = ub[2] + 1.729109)
+
+    bc1_nlls_a(ua, p) = [ua[1]]
+    bc1_nlls_b(ub, p) = [ub[1] - 1, ub[2] + 1.729109]
+
+    tspan = (0.0, 100.0)
+    u0 = [0.0, 1.0]
+    bcresid_prototype1 = Array{Float64}(undef, 3)
+    bcresid_prototype2 = (Array{Float64}(undef, 1), Array{Float64}(undef, 2))
+
+    probs = [
+        BVProblem(BVPFunction(f1_nlls!, bc1_nlls!; bcresid_prototype = bcresid_prototype1),
+            u0, tspan),
+        BVProblem(BVPFunction(f1_nlls, bc1_nlls; bcresid_prototype = bcresid_prototype1),
+            u0, tspan),
+        TwoPointBVProblem(f1_nlls!, (bc1_nlls_a!, bc1_nlls_b!), u0, tspan;
+            bcresid_prototype = bcresid_prototype2),
+        TwoPointBVProblem(f1_nlls, (bc1_nlls_a, bc1_nlls_b), u0, tspan;
+            bcresid_prototype = bcresid_prototype2),
+    ]
+
+    algs = []
+
+    if @load_preference("PrecompileShootingNLLS", VERSION≥v"1.10-")
+        append!(algs,
+            [
+                Shooting(Tsit5();
+                    nlsolve = LevenbergMarquardt(;
+                        autodiff = AutoForwardDiff(chunksize = 2))),
+                Shooting(Tsit5();
+                    nlsolve = GaussNewton(; autodiff = AutoForwardDiff(chunksize = 2))),
+            ])
+    end
+
+    if @load_preference("PrecompileMultipleShootingNLLS", VERSION≥v"1.10-")
+        append!(algs,
+            [
+                MultipleShooting(10, Tsit5();
+                    nlsolve = LevenbergMarquardt(;
+                        autodiff = AutoForwardDiff(chunksize = 2)),
+                    jac_alg = BVPJacobianAlgorithm(;
+                        bc_diffmode = AutoForwardDiff(; chunksize = 2),
+                        nonbc_diffmode = AutoSparseForwardDiff(; chunksize = 2))),
+                MultipleShooting(10, Tsit5();
+                    nlsolve = GaussNewton(; autodiff = AutoForwardDiff(chunksize = 2)),
+                    jac_alg = BVPJacobianAlgorithm(;
+                        bc_diffmode = AutoForwardDiff(; chunksize = 2),
+                        nonbc_diffmode = AutoSparseForwardDiff(; chunksize = 2))),
+            ])
+    end
+
+    @compile_workload begin
+        for prob in probs, alg in algs
+            solve(prob, alg)
+        end
+    end
 end
 
 end
diff --git a/src/BoundaryValueDiffEq.jl b/src/BoundaryValueDiffEq.jl
index 2142a3b5..0113cc87 100644
--- a/src/BoundaryValueDiffEq.jl
+++ b/src/BoundaryValueDiffEq.jl
@@ -89,6 +89,68 @@ end
             solve(prob, alg; dt = 0.2)
         end
     end
+
+    function f1_nlls!(du, u, p, t)
+        du[1] = u[2]
+        du[2] = -u[1]
+    end
+
+    f1_nlls(u, p, t) = [u[2], -u[1]]
+
+    function bc1_nlls!(resid, sol, p, t)
+        solₜ₁ = sol[1]
+        solₜ₂ = sol[end]
+        resid[1] = solₜ₁[1]
+        resid[2] = solₜ₂[1] - 1
+        resid[3] = solₜ₂[2] + 1.729109
+        return nothing
+    end
+    bc1_nlls(sol, p, t) = [sol[1][1], sol[end][1] - 1, sol[end][2] + 1.729109]
+
+    bc1_nlls_a!(resid, ua, p) = (resid[1] = ua[1])
+    bc1_nlls_b!(resid, ub, p) = (resid[1] = ub[1] - 1; resid[2] = ub[2] + 1.729109)
+
+    bc1_nlls_a(ua, p) = [ua[1]]
+    bc1_nlls_b(ub, p) = [ub[1] - 1, ub[2] + 1.729109]
+
+    tspan = (0.0, 100.0)
+    u0 = [0.0, 1.0]
+    bcresid_prototype1 = Array{Float64}(undef, 3)
+    bcresid_prototype2 = (Array{Float64}(undef, 1), Array{Float64}(undef, 2))
+
+    probs = [
+        BVProblem(BVPFunction(f1_nlls!, bc1_nlls!; bcresid_prototype = bcresid_prototype1),
+            u0, tspan),
+        BVProblem(BVPFunction(f1_nlls, bc1_nlls; bcresid_prototype = bcresid_prototype1),
+            u0, tspan),
+        TwoPointBVProblem(f1_nlls!, (bc1_nlls_a!, bc1_nlls_b!), u0, tspan;
+            bcresid_prototype = bcresid_prototype2),
+        TwoPointBVProblem(f1_nlls, (bc1_nlls_a, bc1_nlls_b), u0, tspan;
+            bcresid_prototype = bcresid_prototype2),
+    ]
+
+    jac_alg = BVPJacobianAlgorithm(AutoForwardDiff(; chunksize = 2))
+
+    nlsolvers = [LevenbergMarquardt(), GaussNewton()]
+
+    algs = []
+
+    if Preferences.@load_preference("PrecompileMIRKNLLS", VERSION≥v"1.10-")
+        for nlsolve in nlsolvers
+            append!(algs,
+                [
+                    MIRK2(; jac_alg, nlsolve), MIRK3(; jac_alg, nlsolve),
+                    MIRK4(; jac_alg, nlsolve), MIRK5(; jac_alg, nlsolve),
+                    MIRK6(; jac_alg, nlsolve),
+                ])
+        end
+    end
+
+    @compile_workload begin
+        for prob in probs, alg in algs
+            solve(prob, alg; dt = 0.2)
+        end
+    end
 end
 
 export Shooting, MultipleShooting
diff --git a/src/solve/mirk.jl b/src/solve/mirk.jl
index 62ae0c9c..45857e73 100644
--- a/src/solve/mirk.jl
+++ b/src/solve/mirk.jl
@@ -274,11 +274,12 @@ function __mirk_loss_collocation(u, p, y, mesh, residual, cache)
 end
 
 function __construct_nlproblem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C,
-        loss::L, ::StandardBVProblem) where {iip, BC, C, L}
+        loss::LF, ::StandardBVProblem) where {iip, BC, C, LF}
     @unpack nlsolve, jac_alg = cache.alg
     N = length(cache.mesh)
 
     resid_bc = cache.bcresid_prototype
+    L = length(resid_bc)
     resid_collocation = similar(y, cache.M * (N - 1))
 
     loss_bcₚ = iip ? ((du, u) -> loss_bc(du, u, cache.p)) : (u -> loss_bc(u, cache.p))
@@ -302,45 +303,48 @@ function __construct_nlproblem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_collo
     jac_prototype = vcat(init_jacobian(cache_bc), init_jacobian(cache_collocation))
 
     jac = if iip
-        (J, u, p) -> __mirk_mpoint_jacobian!(J, u, p, jac_alg.bc_diffmode,
+        (J, u, p) -> __mirk_mpoint_jacobian!(J, u, jac_alg.bc_diffmode,
             jac_alg.nonbc_diffmode, cache_bc, cache_collocation, loss_bcₚ,
-            loss_collocationₚ, resid_bc, resid_collocation, cache.M)
+            loss_collocationₚ, resid_bc, resid_collocation, L)
     else
-        (u, p) -> __mirk_mpoint_jacobian(u, p, jac_prototype, jac_alg.bc_diffmode,
+        (u, p) -> __mirk_mpoint_jacobian(jac_prototype, u, jac_alg.bc_diffmode,
             jac_alg.nonbc_diffmode, cache_bc, cache_collocation, loss_bcₚ,
-            loss_collocationₚ, cache.M)
+            loss_collocationₚ, L)
     end
 
-    return NonlinearProblem(NonlinearFunction{iip}(loss; jac, jac_prototype), y, cache.p)
+    nlf = NonlinearFunction{iip}(loss; resid_prototype = vcat(resid_bc, resid_collocation),
+        jac, jac_prototype)
+    return (L == cache.M ? NonlinearProblem : NonlinearLeastSquaresProblem)(nlf, y, cache.p)
 end
 
-function __mirk_mpoint_jacobian!(J, x, p, bc_diffmode, nonbc_diffmode, bc_diffcache,
+function __mirk_mpoint_jacobian!(J, x, bc_diffmode, nonbc_diffmode, bc_diffcache,
         nonbc_diffcache, loss_bc::BC, loss_collocation::C, resid_bc, resid_collocation,
-        M::Int) where {BC, C}
-    sparse_jacobian!(@view(J[1:M, :]), bc_diffmode, bc_diffcache, loss_bc, resid_bc, x)
-    sparse_jacobian!(@view(J[(M + 1):end, :]), nonbc_diffmode, nonbc_diffcache,
+        L::Int) where {BC, C}
+    sparse_jacobian!(@view(J[1:L, :]), bc_diffmode, bc_diffcache, loss_bc, resid_bc, x)
+    sparse_jacobian!(@view(J[(L + 1):end, :]), nonbc_diffmode, nonbc_diffcache,
         loss_collocation, resid_collocation, x)
     return nothing
 end
 
-function __mirk_mpoint_jacobian(x, p, J, bc_diffmode, nonbc_diffmode, bc_diffcache,
-        nonbc_diffcache, loss_bc::BC, loss_collocation::C, M::Int) where {BC, C}
-    sparse_jacobian!(@view(J[1:M, :]), bc_diffmode, bc_diffcache, loss_bc, x)
-    sparse_jacobian!(@view(J[(M + 1):end, :]), nonbc_diffmode, nonbc_diffcache,
+function __mirk_mpoint_jacobian(J, x, bc_diffmode, nonbc_diffmode, bc_diffcache,
+        nonbc_diffcache, loss_bc::BC, loss_collocation::C, L::Int) where {BC, C}
+    sparse_jacobian!(@view(J[1:L, :]), bc_diffmode, bc_diffcache, loss_bc, x)
+    sparse_jacobian!(@view(J[(L + 1):end, :]), nonbc_diffmode, nonbc_diffcache,
         loss_collocation, x)
     return J
 end
 
 function __construct_nlproblem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_collocation::C,
-        loss::L, ::TwoPointBVProblem) where {iip, BC, C, L}
+        loss::LF, ::TwoPointBVProblem) where {iip, BC, C, LF}
     @unpack nlsolve, jac_alg = cache.alg
     N = length(cache.mesh)
 
     lossₚ = iip ? ((du, u) -> loss(du, u, cache.p)) : (u -> loss(u, cache.p))
 
-    resid = vcat(cache.bcresid_prototype[1:prod(cache.resid_size[1])],
+    resid = vcat(@view(cache.bcresid_prototype[1:prod(cache.resid_size[1])]),
         similar(y, cache.M * (N - 1)),
-        cache.bcresid_prototype[(prod(cache.resid_size[1]) + 1):end])
+        @view(cache.bcresid_prototype[(prod(cache.resid_size[1]) + 1):end]))
+    L = length(cache.bcresid_prototype)
 
     sd = if jac_alg.diffmode isa AbstractSparseADType
         __sparsity_detection_alg(__generate_sparse_jacobian_prototype(cache,
@@ -354,22 +358,24 @@ function __construct_nlproblem(cache::MIRKCache{iip}, y, loss_bc::BC, loss_collo
     jac_prototype = init_jacobian(diffcache)
 
     jac = if iip
-        (J, u, p) -> __mirk_2point_jacobian!(J, u, p, jac_alg.diffmode, diffcache, lossₚ,
+        (J, u, p) -> __mirk_2point_jacobian!(J, u, jac_alg.diffmode, diffcache, lossₚ,
             resid)
     else
-        (u, p) -> __mirk_2point_jacobian(u, p, jac_prototype, jac_alg.diffmode, diffcache,
+        (u, p) -> __mirk_2point_jacobian(u, jac_prototype, jac_alg.diffmode, diffcache,
             lossₚ)
     end
 
-    return NonlinearProblem(NonlinearFunction{iip}(loss; jac, jac_prototype), y, cache.p)
+    nlf = NonlinearFunction{iip}(loss; resid_prototype = copy(resid), jac, jac_prototype)
+
+    return (L == cache.M ? NonlinearProblem : NonlinearLeastSquaresProblem)(nlf, y, cache.p)
 end
 
-function __mirk_2point_jacobian!(J, x, p, diffmode, diffcache, loss_fn::L, resid) where {L}
+function __mirk_2point_jacobian!(J, x, diffmode, diffcache, loss_fn::L, resid) where {L}
     sparse_jacobian!(J, diffmode, diffcache, loss_fn, resid, x)
     return J
 end
 
-function __mirk_2point_jacobian(x, p, J, diffmode, diffcache, loss_fn::L) where {L}
+function __mirk_2point_jacobian(x, J, diffmode, diffcache, loss_fn::L) where {L}
     sparse_jacobian!(J, diffmode, diffcache, loss_fn, x)
     return J
 end
diff --git a/src/solve/multiple_shooting.jl b/src/solve/multiple_shooting.jl
index 3eb7c9d9..c1ace55a 100644
--- a/src/solve/multiple_shooting.jl
+++ b/src/solve/multiple_shooting.jl
@@ -21,6 +21,9 @@ function __solve(prob::BVProblem, _alg::MultipleShooting; odesolve_kwargs = (;),
     if prob.problem_type isa TwoPointBVProblem
         resida_len = prod(resid_size[1])
         residb_len = prod(resid_size[2])
+        M = resida_len + residb_len
+    else
+        M = length(bcresid_prototype)
     end
 
     internal_ode_kwargs = (; verbose, kwargs..., odesolve_kwargs..., save_end = true)
@@ -46,11 +49,11 @@ function __solve(prob::BVProblem, _alg::MultipleShooting; odesolve_kwargs = (;),
         if prob.problem_type isa TwoPointBVProblem
             __solve_nlproblem!(prob.problem_type, alg, bcresid_prototype, u_at_nodes, nodes,
                 cur_nshoot, N, resida_len, residb_len, solve_internal_odes!, bc[1], bc[2],
-                prob, u0; verbose, kwargs..., nlsolve_kwargs...)
+                prob, u0, M; verbose, kwargs..., nlsolve_kwargs...)
         else
             __solve_nlproblem!(prob.problem_type, alg, bcresid_prototype, u_at_nodes, nodes,
                 cur_nshoot, N, prod(resid_size), solve_internal_odes!, bc, prob, f,
-                u0_size, u0; verbose, kwargs..., nlsolve_kwargs...)
+                u0_size, u0, M; verbose, kwargs..., nlsolve_kwargs...)
         end
     end
 
@@ -61,7 +64,7 @@ end
 
 function __solve_nlproblem!(::TwoPointBVProblem, alg::MultipleShooting, bcresid_prototype,
         u_at_nodes, nodes, cur_nshoot::Int, N::Int, resida_len::Int, residb_len::Int,
-        solve_internal_odes!::S, bca::B1, bcb::B2, prob, u0; kwargs...) where {B1, B2, S}
+        solve_internal_odes!::S, bca::B1, bcb::B2, prob, u0, M; kwargs...) where {B1, B2, S}
     if __any_sparse_ad(alg.jac_alg)
         J_proto = __generate_sparse_jacobian_prototype(alg, prob.problem_type,
             bcresid_prototype, u0, N, cur_nshoot)
@@ -89,7 +92,8 @@ function __solve_nlproblem!(::TwoPointBVProblem, alg::MultipleShooting, bcresid_
         jac_prototype)
 
     # NOTE: u_at_nodes is updated inplace
-    nlprob = NonlinearProblem(loss_function!, u_at_nodes, prob.p)
+    nlprob = (M != N ? NonlinearLeastSquaresProblem : NonlinearProblem)(loss_function!,
+        u_at_nodes, prob.p)
     __solve(nlprob, alg.nlsolve; kwargs..., alias_u0 = true)
 
     return nothing
@@ -97,7 +101,7 @@ end
 
 function __solve_nlproblem!(::StandardBVProblem, alg::MultipleShooting, bcresid_prototype,
         u_at_nodes, nodes, cur_nshoot, N, resid_len::Int, solve_internal_odes!::S, bc::BC,
-        prob, f::F, u0_size, u0; kwargs...) where {BC, F, S}
+        prob, f::F, u0_size, u0, M; kwargs...) where {BC, F, S}
     if __any_sparse_ad(alg.jac_alg)
         J_proto = __generate_sparse_jacobian_prototype(alg, prob.problem_type,
             bcresid_prototype, u0, N, cur_nshoot)
@@ -129,13 +133,14 @@ function __solve_nlproblem!(::StandardBVProblem, alg::MultipleShooting, bcresid_
 
     jac_fn = (J, u, p) -> __multiple_shooting_mpoint_jacobian!(J, u, p,
         similar(bcresid_prototype), resid_nodes, ode_jac_cache, bc_jac_cache,
-        ode_fn, bc_fn, alg, N)
+        ode_fn, bc_fn, alg, N, M)
 
     loss_function! = NonlinearFunction{true}(loss_fn; resid_prototype, jac = jac_fn,
         jac_prototype)
 
     # NOTE: u_at_nodes is updated inplace
-    nlprob = NonlinearProblem(loss_function!, u_at_nodes, prob.p)
+    nlprob = (M != N ? NonlinearLeastSquaresProblem : NonlinearProblem)(loss_function!,
+        u_at_nodes, prob.p)
     __solve(nlprob, alg.nlsolve; kwargs..., alias_u0 = true)
 
     return nothing
@@ -181,9 +186,9 @@ end
 
 function __multiple_shooting_mpoint_jacobian!(J, us, p, resid_bc, resid_nodes,
         ode_jac_cache, bc_jac_cache, ode_fn::F1, bc_fn::F2, alg::MultipleShooting,
-        N::Int) where {F1, F2}
-    J_bc = @view(J[1:N, :])
-    J_c = @view(J[(N + 1):end, :])
+        N::Int, M::Int) where {F1, F2}
+    J_bc = @view(J[1:M, :])
+    J_c = @view(J[(M + 1):end, :])
 
     sparse_jacobian!(J_c, alg.jac_alg.nonbc_diffmode, ode_jac_cache, ode_fn,
         resid_nodes.du, us)
diff --git a/src/solve/single_shooting.jl b/src/solve/single_shooting.jl
index 8d62aa8a..4fcff462 100644
--- a/src/solve/single_shooting.jl
+++ b/src/solve/single_shooting.jl
@@ -1,6 +1,6 @@
 function __solve(prob::BVProblem, alg::Shooting; odesolve_kwargs = (;),
         nlsolve_kwargs = (;), verbose = true, kwargs...)
-    ig, T, _, _, u0 = __extract_problem_details(prob; dt = 0.1)
+    ig, T, N, _, u0 = __extract_problem_details(prob; dt = 0.1)
     _unwrap_val(ig) && verbose &&
         @warn "Initial guess provided, but will be ignored for Shooting!"
 
@@ -17,9 +17,14 @@ function __solve(prob::BVProblem, alg::Shooting; odesolve_kwargs = (;),
             prob.problem_type, alg, ode_kwargs)
     end
 
-    opt = __solve(NonlinearProblem(NonlinearFunction{iip}(loss_fn; prob.f.jac_prototype,
-                resid_prototype), vec(u0), prob.p), alg.nlsolve;
-        nlsolve_kwargs..., verbose, kwargs...)
+    nlf = NonlinearFunction{iip}(loss_fn; prob.f.jac_prototype, resid_prototype)
+    nlprob = if length(resid_prototype) == length(u0)
+        NonlinearProblem(nlf, vec(u0), prob.p)
+    else
+        NonlinearLeastSquaresProblem(nlf, vec(u0), prob.p)
+    end
+    opt = __solve(nlprob, alg.nlsolve; nlsolve_kwargs..., verbose, kwargs...)
+
     newprob = ODEProblem{iip}(prob.f, reshape(opt.u, u0_size), prob.tspan, prob.p)
     sol = __solve(newprob, alg.ode_alg; odesolve_kwargs..., verbose, kwargs...)
 
diff --git a/src/sparse_jacobians.jl b/src/sparse_jacobians.jl
index bd005001..39f8a95a 100644
--- a/src/sparse_jacobians.jl
+++ b/src/sparse_jacobians.jl
@@ -68,6 +68,8 @@ function __generate_sparse_jacobian_prototype(::MIRKCache, ::TwoPointBVProblem,
     J₁ = length(ya) + length(yb) + M * (N - 1)
     J₂ = M * N
     J = BandedMatrix(Ones{eltype(ya)}(J₁, J₂), (M + 1, M + 1))
+    # for underdetermined systems we don't have banded qr implemented. use sparse
+    J₁ < J₂ && return ColoredMatrix(sparse(J), matrix_colors(J'), matrix_colors(J))
     return ColoredMatrix(J, matrix_colors(J'), matrix_colors(J))
 end
 
@@ -111,5 +113,7 @@ function __generate_sparse_jacobian_prototype(::MultipleShooting, ::TwoPointBVPr
     #        We should be able to use that particular structure.
     J = BandedMatrix(Ones{eltype(u0)}(J₁, J₂), (max(L₁, L₂) + N - 1, N + 1))
 
+    # for underdetermined systems we don't have banded qr implemented. use sparse
+    J₁ < J₂ && return ColoredMatrix(sparse(J), matrix_colors(J'), matrix_colors(J))
     return ColoredMatrix(J, matrix_colors(J'), matrix_colors(J))
 end
diff --git a/src/utils.jl b/src/utils.jl
index 644f0dd6..91033da4 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -104,11 +104,13 @@ end
 
 __append_similar!(::Nothing, n, _) = nothing
 
+# NOTE: We use `last` since the `first` might not conform to the same structure. For eg,
+#       in the case of residuals
 function __append_similar!(x::AbstractVector{<:AbstractArray}, n, _)
     N = n - length(x)
     N == 0 && return x
     N < 0 && throw(ArgumentError("Cannot append a negative number of elements"))
-    append!(x, [similar(first(x)) for _ in 1:N])
+    append!(x, [similar(last(x)) for _ in 1:N])
     return x
 end
 
@@ -117,7 +119,7 @@ function __append_similar!(x::AbstractVector{<:MaybeDiffCache}, n, M)
     N == 0 && return x
     N < 0 && throw(ArgumentError("Cannot append a negative number of elements"))
     chunksize = pickchunksize(M * (N + length(x)))
-    append!(x, [__maybe_allocate_diffcache(first(x), chunksize) for _ in 1:N])
+    append!(x, [__maybe_allocate_diffcache(last(x), chunksize) for _ in 1:N])
     return x
 end
 
diff --git a/test/mirk/ensemble.jl b/test/mirk/ensemble.jl
index 252c5eda..5d9151b0 100644
--- a/test/mirk/ensemble.jl
+++ b/test/mirk/ensemble.jl
@@ -23,7 +23,6 @@ ensemble_prob = EnsembleProblem(bvp; prob_func)
         BVPJacobianAlgorithm(; bc_diffmode = AutoFiniteDiff(),
             nonbc_diffmode = AutoSparseFiniteDiff())]
     for jac_alg in jac_algs
-        # Not sure why it is throwing so many warnings
         sol = solve(ensemble_prob, solver(; jac_alg); trajectories = 10, dt = 0.1)
         @test sol.converged
     end
diff --git a/test/mirk/nonlinear_least_squares.jl b/test/mirk/nonlinear_least_squares.jl
new file mode 100644
index 00000000..4323c7b3
--- /dev/null
+++ b/test/mirk/nonlinear_least_squares.jl
@@ -0,0 +1,80 @@
+using BoundaryValueDiffEq, LinearAlgebra, Test
+
+@testset "Overconstrained BVP" begin
+    SOLVERS = [mirk(; nlsolve) for mirk in (MIRK4, MIRK5, MIRK6),
+    nlsolve in (LevenbergMarquardt(), GaussNewton())]
+
+    # OOP MP-BVP
+    f1(u, p, t) = [u[2], -u[1]]
+
+    function bc1(sol, p, t)
+        solₜ₁ = sol[1]
+        solₜ₂ = sol[end]
+        return [solₜ₁[1], solₜ₂[1] - 1, solₜ₂[2] + 1.729109]
+    end
+
+    tspan = (0.0, 100.0)
+    u0 = [0.0, 1.0]
+
+    bvp1 = BVProblem(BVPFunction{false}(f1, bc1; bcresid_prototype = zeros(3)), u0, tspan)
+
+    for solver in SOLVERS
+        @time sol = solve(bvp1, solver; verbose = false, dt = 1.0)
+        @test norm(bc1(sol, nothing, tspan)) < 1e-2
+    end
+
+    # IIP MP-BVP
+    function f1!(du, u, p, t)
+        du[1] = u[2]
+        du[2] = -u[1]
+        return nothing
+    end
+
+    function bc1!(resid, sol, p, t)
+        solₜ₁ = sol[1]
+        solₜ₂ = sol[end]
+        # We know that this overconstrained system has a solution
+        resid[1] = solₜ₁[1]
+        resid[2] = solₜ₂[1] - 1
+        resid[3] = solₜ₂[2] + 1.729109
+        return nothing
+    end
+
+    bvp2 = BVProblem(BVPFunction{true}(f1!, bc1!; bcresid_prototype = zeros(3)), u0, tspan)
+
+    for solver in SOLVERS
+        @time sol = solve(bvp2, solver; verbose = false, dt = 1.0)
+        resid_f = Array{Float64}(undef, 3)
+        bc1!(resid_f, sol, nothing, sol.t)
+        @test norm(resid_f) < 1e-2
+    end
+
+    # OOP TP-BVP
+    bc1a(ua, p) = [ua[1]]
+    bc1b(ub, p) = [ub[1] - 1, ub[2] + 1.729109]
+
+    bvp3 = TwoPointBVProblem(BVPFunction{false}(f1, (bc1a, bc1b); twopoint = Val(true),
+            bcresid_prototype = (zeros(1), zeros(2))), u0, tspan)
+
+    for solver in SOLVERS
+        @time sol = solve(bvp3, solver; verbose = false, dt = 1.0)
+        @test norm(vcat(bc1a(sol[1], nothing), bc1b(sol[end], nothing))) < 1e-2
+    end
+
+    # IIP TP-BVP
+    bc1a!(resid, ua, p) = (resid[1] = ua[1])
+    bc1b!(resid, ub, p) = (resid[1] = ub[1] - 1; resid[2] = ub[2] + 1.729109)
+
+    bvp4 = TwoPointBVProblem(BVPFunction{true}(f1!, (bc1a!, bc1b!); twopoint = Val(true),
+            bcresid_prototype = (zeros(1), zeros(2))), u0, tspan)
+
+    for solver in SOLVERS
+        @time sol = solve(bvp3, solver; verbose = false, dt = 1.0, abstol = 1e-3,
+            reltol = 1e-3, nlsolve_kwargs = (; maxiters = 50, abstol = 1e-3, reltol = 1e-3))
+        resida = Array{Float64}(undef, 1)
+        residb = Array{Float64}(undef, 2)
+        bc1a!(resida, sol(0.0), nothing)
+        bc1b!(residb, sol(100.0), nothing)
+        @test norm(vcat(resida, residb)) < 1e-2
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 7c1db94e..8b07648f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -14,6 +14,11 @@ const GROUP = uppercase(get(ENV, "GROUP", "ALL"))
             @time @safetestset "Orbital" begin
                 include("shooting/orbital.jl")
             end
+            if VERSION ≥ v"1.10-"
+                @time @safetestset "Shooting NLLS Tests" begin
+                    include("shooting/nonlinear_least_squares.jl")
+                end
+            end
         end
     end
 
@@ -31,6 +36,11 @@ const GROUP = uppercase(get(ENV, "GROUP", "ALL"))
             @time @safetestset "Interpolation Tests" begin
                 include("mirk/interpolation_test.jl")
             end
+            if VERSION ≥ v"1.10-"
+                @time @safetestset "MIRK NLLS Tests" begin
+                    include("mirk/nonlinear_least_squares.jl")
+                end
+            end
         end
     end
 
diff --git a/test/shooting/nonlinear_least_squares.jl b/test/shooting/nonlinear_least_squares.jl
new file mode 100644
index 00000000..7108f7e5
--- /dev/null
+++ b/test/shooting/nonlinear_least_squares.jl
@@ -0,0 +1,96 @@
+using BoundaryValueDiffEq, LinearAlgebra, LinearSolve, OrdinaryDiffEq, Test
+
+@testset "Overconstrained BVP" begin
+    SOLVERS = [
+        Shooting(Tsit5(); nlsolve = LevenbergMarquardt()),
+        Shooting(Tsit5(); nlsolve = GaussNewton()),
+        MultipleShooting(10, Tsit5(); nlsolve = LevenbergMarquardt()),
+        MultipleShooting(10, Tsit5(); nlsolve = GaussNewton())]
+
+    # OOP MP-BVP
+    f1(u, p, t) = [u[2], -u[1]]
+
+    function bc1(sol, p, t)
+        t₁, t₂ = extrema(t)
+        solₜ₁ = sol(t₁)
+        solₜ₂ = sol(t₂)
+        solₜ₃ = sol((t₁ + t₂) / 2)
+        # We know that this overconstrained system has a solution
+        return [solₜ₁[1], solₜ₂[1] - 1, solₜ₃[1] - 0.51735, solₜ₃[2] + 1.92533]
+    end
+
+    tspan = (0.0, 100.0)
+    u0 = [0.0, 1.0]
+
+    bvp1 = BVProblem(BVPFunction{false}(f1, bc1; bcresid_prototype = zeros(4)), u0, tspan)
+
+    for solver in SOLVERS
+        @time sol = solve(bvp1, solver;
+            nlsolve_kwargs = (; abstol = 1e-8, reltol = 1e-8, maxiters = 1000),
+            verbose = false)
+        @test norm(bc1(sol, nothing, sol.t)) < 1e-4
+    end
+
+    # IIP MP-BVP
+    function f1!(du, u, p, t)
+        du[1] = u[2]
+        du[2] = -u[1]
+        return nothing
+    end
+
+    function bc1!(resid, sol, p, t)
+        (t₁, t₂) = extrema(t)
+        solₜ₁ = sol(t₁)
+        solₜ₂ = sol(t₂)
+        solₜ₃ = sol((t₁ + t₂) / 2)
+        # We know that this overconstrained system has a solution
+        resid[1] = solₜ₁[1]
+        resid[2] = solₜ₂[1] - 1
+        resid[3] = solₜ₃[1] - 0.51735
+        resid[4] = solₜ₃[2] + 1.92533
+        return nothing
+    end
+
+    bvp2 = BVProblem(BVPFunction{true}(f1!, bc1!; bcresid_prototype = zeros(4)), u0, tspan)
+
+    for solver in SOLVERS
+        @time sol = solve(bvp2, solver;
+            nlsolve_kwargs = (; abstol = 1e-8, reltol = 1e-8, maxiters = 1000),
+            verbose = false)
+        resid_f = Array{Float64}(undef, 4)
+        bc1!(resid_f, sol, nothing, sol.t)
+        @test norm(resid_f) < 1e-4
+    end
+
+    # OOP TP-BVP
+    bc1a(ua, p) = [ua[1]]
+    bc1b(ub, p) = [ub[1] - 1, ub[2] + 1.729109]
+
+    bvp3 = TwoPointBVProblem(BVPFunction{false}(f1, (bc1a, bc1b); twopoint = Val(true),
+            bcresid_prototype = (zeros(1), zeros(2))), u0, tspan)
+
+    for solver in SOLVERS
+        @time sol = solve(bvp3, solver;
+            nlsolve_kwargs = (; abstol = 1e-8, reltol = 1e-8, maxiters = 1000),
+            verbose = false)
+        @test norm(vcat(bc1a(sol(0.0), nothing), bc1b(sol(100.0), nothing))) < 1e-4
+    end
+
+    # IIP TP-BVP
+    bc1a!(resid, ua, p) = (resid[1] = ua[1])
+    bc1b!(resid, ub, p) = (resid[1] = ub[1] - 1; resid[2] = ub[2] + 1.729109)
+
+    bvp4 = TwoPointBVProblem(BVPFunction{true}(f1!, (bc1a!, bc1b!); twopoint = Val(true),
+            bcresid_prototype = (zeros(1), zeros(2))), u0, tspan)
+
+    for solver in SOLVERS
+        @time sol = solve(bvp4, solver;
+            nlsolve_kwargs = (; abstol = 1e-8, reltol = 1e-8, maxiters = 1000),
+            verbose = false)
+        resida = Array{Float64}(undef, 1)
+        residb = Array{Float64}(undef, 2)
+        bc1a!(resida, sol(0.0), nothing)
+        bc1b!(residb, sol(100.0), nothing)
+        @test norm(vcat(resida, residb)) < 1e-4
+    end
+end
diff --git a/test/shooting/shooting_tests.jl b/test/shooting/shooting_tests.jl
index 008ff4da..53012d06 100644
--- a/test/shooting/shooting_tests.jl
+++ b/test/shooting/shooting_tests.jl
@@ -1,4 +1,4 @@
-using BoundaryValueDiffEq, LinearAlgebra, OrdinaryDiffEq, Test
+using BoundaryValueDiffEq, LinearAlgebra, LinearSolve, OrdinaryDiffEq, Test
 
 @testset "Basic Shooting Tests" begin
     SOLVERS = [Shooting(Tsit5()), MultipleShooting(10, Tsit5())]