diff --git a/docs/src/api/nonlinearsolve.md b/docs/src/api/nonlinearsolve.md
index 5da204f07..3e6009017 100644
--- a/docs/src/api/nonlinearsolve.md
+++ b/docs/src/api/nonlinearsolve.md
@@ -16,7 +16,9 @@ GeneralKlement
 ## Polyalgorithms
 
 ```@docs
+NonlinearSolvePolyAlgorithm
 FastShortcutNonlinearPolyalg
+FastShortcutNLLSPolyalg
 RobustMultiNewton
 ```
 
diff --git a/docs/src/solvers/NonlinearLeastSquaresSolvers.md b/docs/src/solvers/NonlinearLeastSquaresSolvers.md
index e414acdd8..1690671d9 100644
--- a/docs/src/solvers/NonlinearLeastSquaresSolvers.md
+++ b/docs/src/solvers/NonlinearLeastSquaresSolvers.md
@@ -7,7 +7,10 @@ Solves the nonlinear least squares problem defined by `prob` using the algorithm
 
 ## Recommended Methods
 
-`LevenbergMarquardt` is a good choice for most problems.
+The default method `FastShortcutNLLSPolyalg` is a good choice for most
+problems. It is a polyalgorithm that attempts to use a fast algorithm
+(`GaussNewton`) and if that fails it falls back to a more robust
+algorithm (`LevenbergMarquardt`).
 
 ## Full List of Methods
 
@@ -21,10 +24,3 @@ Solves the nonlinear least squares problem defined by `prob` using the algorithm
     problems.
   - `SimpleNewtonRaphson()`: Simple Gauss Newton Implementation with `QRFactorization` to
     solve a linear least squares problem at each step!
-
-## Example usage
-
-```julia
-using NonlinearSolve
-sol = solve(prob, LevenbergMarquardt())
-```
diff --git a/ext/NonlinearSolveFastLevenbergMarquardtExt.jl b/ext/NonlinearSolveFastLevenbergMarquardtExt.jl
index 08d719f96..e292e29c6 100644
--- a/ext/NonlinearSolveFastLevenbergMarquardtExt.jl
+++ b/ext/NonlinearSolveFastLevenbergMarquardtExt.jl
@@ -32,9 +32,10 @@ end
 (f::InplaceFunction{false})(fx, x, p) = (fx .= f.f(x, p))
 
 function SciMLBase.__init(prob::NonlinearLeastSquaresProblem,
-        alg::FastLevenbergMarquardtJL, args...; abstol = 1e-8, reltol = 1e-8,
-        verbose = false, maxiters = 1000, kwargs...)
+        alg::FastLevenbergMarquardtJL, args...; alias_u0 = false, abstol = 1e-8,
+        reltol = 1e-8, verbose = false, maxiters = 1000, kwargs...)
     iip = SciMLBase.isinplace(prob)
+    u0 = alias_u0 ? prob.u0 : deepcopy(prob.u0)
 
     @assert prob.f.jac!==nothing "FastLevenbergMarquardt requires a Jacobian!"
 
@@ -42,13 +43,13 @@ function SciMLBase.__init(prob::NonlinearLeastSquaresProblem,
     J! = InplaceFunction{iip}(prob.f.jac)
 
     resid_prototype = prob.f.resid_prototype === nothing ?
-                      (!iip ? prob.f(prob.u0, prob.p) : zeros(prob.u0)) :
+                      (!iip ? prob.f(u0, prob.p) : zeros(u0)) :
                       prob.f.resid_prototype
 
-    J = similar(prob.u0, length(resid_prototype), length(prob.u0))
+    J = similar(u0, length(resid_prototype), length(u0))
 
-    solver = _fast_lm_solver(alg, prob.u0)
-    LM = FastLM.LMWorkspace(prob.u0, resid_prototype, J)
+    solver = _fast_lm_solver(alg, u0)
+    LM = FastLM.LMWorkspace(u0, resid_prototype, J)
 
     return FastLevenbergMarquardtJLCache(f!, J!, prob, alg, LM, solver,
         (; xtol = abstol, ftol = reltol, maxit = maxiters, alg.factor, alg.factoraccept,
diff --git a/ext/NonlinearSolveLeastSquaresOptimExt.jl b/ext/NonlinearSolveLeastSquaresOptimExt.jl
index 5cf6ab67d..63ce7d0cc 100644
--- a/ext/NonlinearSolveLeastSquaresOptimExt.jl
+++ b/ext/NonlinearSolveLeastSquaresOptimExt.jl
@@ -33,17 +33,19 @@ end
 (f::FunctionWrapper{false})(du, u) = (du .= f.f(u, f.p))
 
 function SciMLBase.__init(prob::NonlinearLeastSquaresProblem, alg::LeastSquaresOptimJL,
-        args...; abstol = 1e-8, reltol = 1e-8, verbose = false, maxiters = 1000, kwargs...)
+        args...; alias_u0 = false, abstol = 1e-8, reltol = 1e-8, verbose = false,
+        maxiters = 1000, kwargs...)
     iip = SciMLBase.isinplace(prob)
+    u = alias_u0 ? prob.u0 : deepcopy(prob.u0)
 
     f! = FunctionWrapper{iip}(prob.f, prob.p)
     g! = prob.f.jac === nothing ? nothing : FunctionWrapper{iip}(prob.f.jac, prob.p)
 
     resid_prototype = prob.f.resid_prototype === nothing ?
-                      (!iip ? prob.f(prob.u0, prob.p) : zeros(prob.u0)) :
+                      (!iip ? prob.f(u, prob.p) : zeros(u)) :
                       prob.f.resid_prototype
 
-    lsoprob = LSO.LeastSquaresProblem(; x = prob.u0, f!, y = resid_prototype, g!,
+    lsoprob = LSO.LeastSquaresProblem(; x = u, f!, y = resid_prototype, g!,
         J = prob.f.jac_prototype, alg.autodiff, output_length = length(resid_prototype))
     allocated_prob = LSO.LeastSquaresProblemAllocated(lsoprob, _lso_solver(alg))
 
diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 39210cd3a..5c97b8340 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -147,7 +147,8 @@ export RadiusUpdateSchemes
 export NewtonRaphson, TrustRegion, LevenbergMarquardt, DFSane, GaussNewton, PseudoTransient,
     GeneralBroyden, GeneralKlement, LimitedMemoryBroyden
 export LeastSquaresOptimJL, FastLevenbergMarquardtJL
-export NonlinearSolvePolyAlgorithm, RobustMultiNewton, FastShortcutNonlinearPolyalg
+export NonlinearSolvePolyAlgorithm,
+    RobustMultiNewton, FastShortcutNonlinearPolyalg, FastShortcutNLLSPolyalg
 
 export LineSearch, LiFukushimaLineSearch
 
diff --git a/src/default.jl b/src/default.jl
index 758e049f3..e1138e8fb 100644
--- a/src/default.jl
+++ b/src/default.jl
@@ -250,14 +250,57 @@ function FastShortcutNonlinearPolyalg(; concrete_jac = nothing, linsolve = nothi
     return NonlinearSolvePolyAlgorithm(algs, Val(:NLS))
 end
 
+"""
+    FastShortcutNLLSPolyalg(; concrete_jac = nothing, linsolve = nothing,
+                              precs = DEFAULT_PRECS, adkwargs...)
+
+A polyalgorithm focused on balancing speed and robustness. It first tries less robust methods
+for more performance and then tries more robust techniques if the faster ones fail.
+
+### Keyword Arguments
+
+  - `autodiff`: determines the backend used for the Jacobian. Note that this argument is
+    ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
+    `AutoForwardDiff()`. Valid choices are types from ADTypes.jl.
+  - `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
+    then the Jacobian will not be constructed and instead direct Jacobian-vector products
+    `J*v` are computed using forward-mode automatic differentiation or finite differencing
+    tricks (without ever constructing the Jacobian). However, if the Jacobian is still needed,
+    for example for a preconditioner, `concrete_jac = true` can be passed in order to force
+    the construction of the Jacobian.
+  - `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
+    linear solves within the Newton method. Defaults to `nothing`, which means it uses the
+    LinearSolve.jl default algorithm choice. For more information on available algorithm
+    choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+  - `precs`: the choice of preconditioners for the linear solver. Defaults to using no
+    preconditioners. For more information on specifying preconditioners for LinearSolve
+    algorithms, consult the
+    [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+"""
+function FastShortcutNLLSPolyalg(; concrete_jac = nothing, linsolve = nothing,
+        precs = DEFAULT_PRECS, adkwargs...)
+    algs = (GaussNewton(; concrete_jac, linsolve, precs, adkwargs...),
+        GaussNewton(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
+            adkwargs...),
+        LevenbergMarquardt(; concrete_jac, linsolve, precs, adkwargs...))
+    return NonlinearSolvePolyAlgorithm(algs, Val(:NLLS))
+end
+
 ## Defaults
 
-function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::Nothing, args...;
-        kwargs...) where {uType, iip}
-    SciMLBase.__init(prob, FastShortcutNonlinearPolyalg(), args...; kwargs...)
+function SciMLBase.__init(prob::NonlinearProblem, ::Nothing, args...; kwargs...)
+    return SciMLBase.__init(prob, FastShortcutNonlinearPolyalg(), args...; kwargs...)
+end
+
+function SciMLBase.__solve(prob::NonlinearProblem, ::Nothing, args...; kwargs...)
+    return SciMLBase.__solve(prob, FastShortcutNonlinearPolyalg(), args...; kwargs...)
+end
+
+function SciMLBase.__init(prob::NonlinearLeastSquaresProblem, ::Nothing, args...; kwargs...)
+    return SciMLBase.__init(prob, FastShortcutNLLSPolyalg(), args...; kwargs...)
 end
 
-function SciMLBase.__solve(prob::NonlinearProblem{uType, iip}, alg::Nothing, args...;
-        kwargs...) where {uType, iip}
-    SciMLBase.__solve(prob, FastShortcutNonlinearPolyalg(), args...; kwargs...)
+function SciMLBase.__solve(prob::NonlinearLeastSquaresProblem, ::Nothing, args...;
+        kwargs...)
+    return SciMLBase.__solve(prob, FastShortcutNLLSPolyalg(), args...; kwargs...)
 end
diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl
index b77d89087..72843c42e 100644
--- a/test/23_test_problems.jl
+++ b/test/23_test_problems.jl
@@ -85,7 +85,7 @@ end
 # Broyden and Klement Tests are quite flaky and failure seems to be platform dependent
 # needs additional investigation before we can enable them
 @testset "GeneralBroyden 23 Test Problems" begin
-    alg_ops = (GeneralBroyden(),)
+    alg_ops = (GeneralBroyden(; max_resets = 10),)
 
     broken_tests = Dict(alg => Int[] for alg in alg_ops)
     broken_tests[alg_ops[1]] = [1, 2, 4, 5, 6, 11, 12, 13, 14]
diff --git a/test/nonlinear_least_squares.jl b/test/nonlinear_least_squares.jl
index 07a310196..7b8354a9d 100644
--- a/test/nonlinear_least_squares.jl
+++ b/test/nonlinear_least_squares.jl
@@ -12,12 +12,12 @@ y_target = true_function(x, θ_true)
 
 function loss_function(θ, p)
     ŷ = true_function(p, θ)
-    return abs2.(ŷ .- y_target)
+    return ŷ .- y_target
 end
 
 function loss_function(resid, θ, p)
     true_function(resid, p, θ)
-    resid .= abs2.(resid .- y_target)
+    resid .= resid .- y_target
     return resid
 end
 
@@ -36,6 +36,7 @@ append!(solvers,
         LevenbergMarquardt(; linsolve = LUFactorization()),
         LeastSquaresOptimJL(:lm),
         LeastSquaresOptimJL(:dogleg),
+        nothing,
     ])
 
 for prob in nlls_problems, solver in solvers
@@ -45,7 +46,8 @@ for prob in nlls_problems, solver in solvers
 end
 
 function jac!(J, θ, p)
-    ForwardDiff.jacobian!(J, resid -> loss_function(resid, θ, p), θ)
+    resid = zeros(length(p))
+    ForwardDiff.jacobian!(J, (resid, θ) -> loss_function(resid, θ, p), resid, θ)
     return J
 end
 
@@ -56,6 +58,5 @@ solvers = [FastLevenbergMarquardtJL(:cholesky), FastLevenbergMarquardtJL(:qr)]
 
 for solver in solvers
     @time sol = solve(prob, solver; maxiters = 10000, abstol = 1e-8)
-    @test SciMLBase.successful_retcode(sol)
     @test norm(sol.resid) < 1e-6
 end