diff --git a/docs/src/user/minimization.md b/docs/src/user/minimization.md index 74d8efbe0..d3590235c 100644 --- a/docs/src/user/minimization.md +++ b/docs/src/user/minimization.md @@ -219,3 +219,22 @@ line search errors if `initial_x` is a stationary point. Notice, that this is on a first order check. If `initial_x` is any type of stationary point, `g_converged` will be true. This includes local minima, saddle points, and local maxima. If `iterations` is `0` and `g_converged` is `true`, the user needs to keep this point in mind. + +## Iterator interface +For multivariable optimizations, iterator interface is provided through `Optim.optimizing` +function. Using this interface, `optimize(args...; kwargs...)` is equivalent to + +```jl +let istate + for istate′ in Optim.optimizing(args...; kwargs...) + istate = istate′ + end + Optim.OptimizationResults(istate) +end +``` + +The iterator returned by `Optim.optimizing` yields an iterator state for each iteration +step. + +Functions that can be called on the result object (e.g. `minimizer`, `iterations`; see +[Complete list of functions](@ref)) can be used on the iteration state `istate`. diff --git a/src/api.jl b/src/api.jl index aa7e1992d..0b6e91b52 100644 --- a/src/api.jl +++ b/src/api.jl @@ -1,10 +1,13 @@ -Base.summary(r::OptimizationResults) = summary(r.method) # might want to do more here than just return summary of the method used +Base.summary(r::Union{OptimizationResults, IteratorState}) = + summary(AbstractOptimizer(r)) # might want to do more here than just return summary of the method used minimizer(r::OptimizationResults) = r.minimizer minimum(r::OptimizationResults) = r.minimum iterations(r::OptimizationResults) = r.iterations iteration_limit_reached(r::OptimizationResults) = r.iteration_converged trace(r::OptimizationResults) = length(r.trace) > 0 ? r.trace : error("No trace in optimization results. To get a trace, run optimize() with store_trace = true.") +AbstractOptimizer(r::OptimizationResults) = r.method + function x_trace(r::UnivariateOptimizationResults) tr = trace(r) !haskey(tr[1].metadata, "minimizer") && error("Trace does not contain x. To get a trace of x, run optimize() with extended_trace = true") @@ -23,41 +26,45 @@ function x_upper_trace(r::UnivariateOptimizationResults) end x_upper_trace(r::MultivariateOptimizationResults) = error("x_upper_trace is not implemented for $(summary(r)).") -function x_trace(r::MultivariateOptimizationResults) +function x_trace(r::Union{MultivariateOptimizationResults, IteratorState}) tr = trace(r) - if isa(r.method, NelderMead) + if isa(AbstractOptimizer(r), NelderMead) throw(ArgumentError("Nelder Mead does not operate with a single x. Please use either centroid_trace(...) or simplex_trace(...) to extract the relevant points from the trace.")) end !haskey(tr[1].metadata, "x") && error("Trace does not contain x. To get a trace of x, run optimize() with extended_trace = true") [ state.metadata["x"] for state in tr ] end -function centroid_trace(r::MultivariateOptimizationResults) - if !isa(r.method, NelderMead) - throw(ArgumentError("There is no centroid involved in optimization using $(r.method). Please use x_trace(...) to grab the points from the trace.")) +function centroid_trace(r::Union{MultivariateOptimizationResults, IteratorState}) + tr = trace(r) + if !isa(AbstractOptimizer(r), NelderMead) + throw(ArgumentError("There is no centroid involved in optimization using $(AbstractOptimizer(r)). Please use x_trace(...) to grab the points from the trace.")) end !haskey(tr[1].metadata, "centroid") && error("Trace does not contain centroid. To get a trace of the centroid, run optimize() with extended_trace = true") [ state.metadata["centroid"] for state in tr ] end -function simplex_trace(r::MultivariateOptimizationResults) - if !isa(r.method, NelderMead) - throw(ArgumentError("There is no simplex involved in optimization using $(r.method). Please use x_trace(...) to grab the points from the trace.")) +function simplex_trace(r::Union{MultivariateOptimizationResults, IteratorState}) + tr = trace(r) + if !isa(AbstractOptimizer(r), NelderMead) + throw(ArgumentError("There is no simplex involved in optimization using $(AbstractOptimizer(r)). Please use x_trace(...) to grab the points from the trace.")) end !haskey(tr[1].metadata, "simplex") && error("Trace does not contain simplex. To get a trace of the simplex, run optimize() with trace_simplex = true") [ state.metadata["simplex"] for state in tr ] end -function simplex_value_trace(r::MultivariateOptimizationResults) - if !isa(r.method, NelderMead) - throw(ArgumentError("There are no simplex values involved in optimization using $(r.method). Please use f_trace(...) to grab the objective values from the trace.")) +function simplex_value_trace(r::Union{MultivariateOptimizationResults, IteratorState}) + tr = trace(r) + if !isa(AbstractOptimizer(r), NelderMead) + throw(ArgumentError("There are no simplex values involved in optimization using $(AbstractOptimizer(r)). Please use f_trace(...) to grab the objective values from the trace.")) end !haskey(tr[1].metadata, "simplex_values") && error("Trace does not contain objective values at the simplex. To get a trace of the simplex values, run optimize() with trace_simplex = true") [ state.metadata["simplex_values"] for state in tr ] end -f_trace(r::OptimizationResults) = [ state.value for state in trace(r) ] +f_trace(r::Union{OptimizationResults, IteratorState}) = [ state.value for state in trace(r) ] g_norm_trace(r::OptimizationResults) = error("g_norm_trace is not implemented for $(summary(r)).") -g_norm_trace(r::MultivariateOptimizationResults) = [ state.g_norm for state in trace(r) ] +g_norm_trace(r::Union{MultivariateOptimizationResults, IteratorState}) = + [ state.g_norm for state in trace(r) ] f_calls(r::OptimizationResults) = r.f_calls f_calls(d) = first(d.f_calls) diff --git a/src/multivariate/optimize/interface.jl b/src/multivariate/optimize/interface.jl index 3561a0f26..f4cc77550 100644 --- a/src/multivariate/optimize/interface.jl +++ b/src/multivariate/optimize/interface.jl @@ -54,16 +54,16 @@ promote_objtype(method::FirstOrderOptimizer, x, autodiff::Symbol, inplace::Bool promote_objtype(method::SecondOrderOptimizer, x, autodiff::Symbol, inplace::Bool, td::TwiceDifferentiable) = td # if no method or options are present -function optimize(f, initial_x::AbstractArray; inplace = true, autodiff = :finite, kwargs...) +function optimizing(f, initial_x::AbstractArray; inplace = true, autodiff = :finite, kwargs...) method = fallback_method(f) checked_kwargs, method = check_kwargs(kwargs, method) d = promote_objtype(method, initial_x, autodiff, inplace, f) add_default_opts!(checked_kwargs, method) options = Options(; checked_kwargs...) - optimize(d, initial_x, method, options) + optimizing(d, initial_x, method, options) end -function optimize(f, g, initial_x::AbstractArray; inplace = true, autodiff = :finite, kwargs...) +function optimizing(f, g, initial_x::AbstractArray; inplace = true, autodiff = :finite, kwargs...) method = fallback_method(f, g) checked_kwargs, method = check_kwargs(kwargs, method) @@ -71,9 +71,9 @@ function optimize(f, g, initial_x::AbstractArray; inplace = true, autodiff = :fi add_default_opts!(checked_kwargs, method) options = Options(; checked_kwargs...) - optimize(d, initial_x, method, options) + optimizing(d, initial_x, method, options) end -function optimize(f, g, h, initial_x::AbstractArray; inplace = true, autodiff = :finite, kwargs...) +function optimizing(f, g, h, initial_x::AbstractArray; inplace = true, autodiff = :finite, kwargs...) method = fallback_method(f, g, h) checked_kwargs, method = check_kwargs(kwargs, method) @@ -81,57 +81,67 @@ function optimize(f, g, h, initial_x::AbstractArray; inplace = true, autodiff = add_default_opts!(checked_kwargs, method) options = Options(; checked_kwargs...) - optimize(d, initial_x, method, options) + optimizing(d, initial_x, method, options) end # no method supplied with objective -function optimize(d::T, initial_x::AbstractArray, options::Options) where T<:AbstractObjective - optimize(d, initial_x, fallback_method(d), options) +function optimizing(d::T, initial_x::AbstractArray, options::Options) where T<:AbstractObjective + optimizing(d, initial_x, fallback_method(d), options) end # no method supplied with inplace and autodiff keywords becauase objective is not supplied -function optimize(f, initial_x::AbstractArray, options::Options; inplace = true, autodiff = :finite) +function optimizing(f, initial_x::AbstractArray, options::Options; inplace = true, autodiff = :finite) method = fallback_method(f) d = promote_objtype(method, initial_x, autodiff, inplace, f) - optimize(d, initial_x, method, options) + optimizing(d, initial_x, method, options) end -function optimize(f, g, initial_x::AbstractArray, options::Options; inplace = true, autodiff = :finite) +function optimizing(f, g, initial_x::AbstractArray, options::Options; inplace = true, autodiff = :finite) method = fallback_method(f, g) d = promote_objtype(method, initial_x, autodiff, inplace, f, g) - optimize(d, initial_x, method, options) + optimizing(d, initial_x, method, options) end -function optimize(f, g, h, initial_x::AbstractArray{T}, options::Options; inplace = true, autodiff = :finite) where {T} +function optimizing(f, g, h, initial_x::AbstractArray{T}, options::Options; inplace = true, autodiff = :finite) where {T} method = fallback_method(f, g, h) d = promote_objtype(method, initial_x, autodiff, inplace, f, g, h) - optimize(d, initial_x, method, options) + optimizing(d, initial_x, method, options) end # potentially everything is supplied (besides caches) -function optimize(f, initial_x::AbstractArray, method::AbstractOptimizer, +function optimizing(f, initial_x::AbstractArray, method::AbstractOptimizer, options::Options = Options(;default_options(method)...); inplace = true, autodiff = :finite) d = promote_objtype(method, initial_x, autodiff, inplace, f) - optimize(d, initial_x, method, options) + optimizing(d, initial_x, method, options) end -function optimize(f, g, initial_x::AbstractArray, method::AbstractOptimizer, +function optimizing(f, g, initial_x::AbstractArray, method::AbstractOptimizer, options::Options = Options(;default_options(method)...); inplace = true, autodiff = :finite) d = promote_objtype(method, initial_x, autodiff, inplace, f, g) - optimize(d, initial_x, method, options) + optimizing(d, initial_x, method, options) end -function optimize(f, g, h, initial_x::AbstractArray{T}, method::AbstractOptimizer, +function optimizing(f, g, h, initial_x::AbstractArray{T}, method::AbstractOptimizer, options::Options = Options(;default_options(method)...); inplace = true, autodiff = :finite) where T d = promote_objtype(method, initial_x, autodiff, inplace, f, g, h) - optimize(d, initial_x, method, options) + optimizing(d, initial_x, method, options) end -function optimize(d::D, initial_x::AbstractArray, method::SecondOrderOptimizer, +function optimizing(d::D, initial_x::AbstractArray, method::SecondOrderOptimizer, options::Options = Options(;default_options(method)...); autodiff = :finite, inplace = true) where {D <: Union{NonDifferentiable, OnceDifferentiable}} d = promote_objtype(method, initial_x, autodiff, inplace, d) - optimize(d, initial_x, method, options) + optimizing(d, initial_x, method, options) +end + +function optimize(args...; kwargs...) + local istate + for istate′ in optimizing(args...; kwargs...) + istate = istate′ + end + # We can safely assume that `istate` is defined at this point. That is to say, + # `OptimIterator` guarantees that `iterate(::OptimIterator) !== nothing`. + return OptimizationResults(istate) end diff --git a/src/multivariate/optimize/optimize.jl b/src/multivariate/optimize/optimize.jl index 29a35df91..6e1781833 100644 --- a/src/multivariate/optimize/optimize.jl +++ b/src/multivariate/optimize/optimize.jl @@ -27,36 +27,77 @@ function initial_convergence(d, state, method::AbstractOptimizer, initial_x, opt end initial_convergence(d, state, method::ZerothOrderOptimizer, initial_x, options) = false -function optimize(d::D, initial_x::Tx, method::M, - options::Options{T, TCallback} = Options(;default_options(method)...), - state = initial_state(method, options, d, initial_x)) where {D<:AbstractObjective, M<:AbstractOptimizer, Tx <: AbstractArray, T, TCallback} - if length(initial_x) == 1 && typeof(method) <: NelderMead - error("You cannot use NelderMead for univariate problems. Alternatively, use either interval bound univariate optimization, or another method such as BFGS or Newton.") - end +struct OptimIterator{D <: AbstractObjective, M <: AbstractOptimizer, Tx <: AbstractArray, O <: Options, S} + d::D + initial_x::Tx + method::M + options::O + state::S +end + +Base.IteratorSize(::Type{<:OptimIterator}) = Base.SizeUnknown() +Base.IteratorEltype(::Type{<:OptimIterator}) = Base.HasEltype() +Base.eltype(::Type{<:OptimIterator}) = IteratorState + +@with_kw struct IteratorState{IT <: OptimIterator, TR <: OptimizationTrace} + # Put `OptimIterator` in iterator state so that `OptimizationResults` can + # be constructed from `IteratorState`. + iter::IT + + t0::Float64 + _time::Float64 + tr::TR + tracing::Bool + stopped::Bool + stopped_by_callback::Bool + stopped_by_time_limit::Bool + f_limit_reached::Bool + g_limit_reached::Bool + h_limit_reached::Bool + x_converged::Bool + f_converged::Bool + f_increased::Bool + counter_f_tol::Int + g_converged::Bool + converged::Bool + iteration::Int + ls_success::Bool +end + +function Base.iterate(iter::OptimIterator, istate = nothing) + @unpack d, initial_x, method, options, state = iter + if istate === nothing + t0 = time() # Initial time stamp used to control early stopping by options.time_limit + tr = OptimizationTrace{typeof(value(d)), typeof(method)}() + tracing = options.store_trace || options.show_trace || options.extended_trace || options.callback != nothing + stopped, stopped_by_callback, stopped_by_time_limit = false, false, false + f_limit_reached, g_limit_reached, h_limit_reached = false, false, false + x_converged, f_converged, f_increased, counter_f_tol = false, false, false, 0 + + g_converged = initial_convergence(d, state, method, initial_x, options) + converged = g_converged - t0 = time() # Initial time stamp used to control early stopping by options.time_limit - tr = OptimizationTrace{typeof(value(d)), typeof(method)}() - tracing = options.store_trace || options.show_trace || options.extended_trace || options.callback != nothing - stopped, stopped_by_callback, stopped_by_time_limit = false, false, false - f_limit_reached, g_limit_reached, h_limit_reached = false, false, false - x_converged, f_converged, f_increased, counter_f_tol = false, false, false, 0 + # prepare iteration counter (used to make "initial state" trace entry) + iteration = 0 - g_converged = initial_convergence(d, state, method, initial_x, options) - converged = g_converged + options.show_trace && print_header(method) + _time = time() + trace!(tr, d, state, iteration, method, options, _time-t0) + ls_success::Bool = true + + # Note: `optimize` depends on that first iteration always yields something + # (i.e., `iterate` does _not_ return a `nothing` when `istate === nothing`). + else + @unpack_IteratorState istate - # prepare iteration counter (used to make "initial state" trace entry) - iteration = 0 + !converged && !stopped && iteration < options.iterations || return nothing - options.show_trace && print_header(method) - _time = time() - trace!(tr, d, state, iteration, method, options, _time-t0) - ls_success::Bool = true - while !converged && !stopped && iteration < options.iterations iteration += 1 ls_failed = update_state!(d, state, method) if !ls_success - break # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS, or linesearch errors) + # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS, or linesearch errors) + return nothing end update_g!(d, state, method) # TODO: Should this be `update_fg!`? @@ -86,19 +127,47 @@ function optimize(d::D, initial_x::Tx, method::M, stopped_by_time_limit || f_limit_reached || g_limit_reached || h_limit_reached stopped = true end - end # while + end + + new_istate = IteratorState( + iter, + t0, + _time, + tr, + tracing, + stopped, + stopped_by_callback, + stopped_by_time_limit, + f_limit_reached, + g_limit_reached, + h_limit_reached, + x_converged, + f_converged, + f_increased, + counter_f_tol, + g_converged, + converged, + iteration, + ls_success, + ) + + return new_istate, new_istate +end + +function OptimizationResults(istate::IteratorState) + @unpack_IteratorState istate + @unpack d, initial_x, method, options, state = iter after_while!(d, state, method, options) - # we can just check minimum, as we've earlier enforced same types/eltypes - # in variables besides the option settings Tf = typeof(value(d)) - f_incr_pick = f_increased && !options.allow_f_increases + T = typeof(options.x_abstol) + Tx = typeof(initial_x) return MultivariateOptimizationResults{typeof(method),T,Tx,typeof(x_abschange(state)),Tf,typeof(tr), Bool}(method, initial_x, - pick_best_x(f_incr_pick, state), - pick_best_f(f_incr_pick, state, d), + minimizer(istate), + minimum(istate), iteration, iteration == options.iterations, x_converged, @@ -124,3 +193,45 @@ function optimize(d::D, initial_x::Tx, method::M, _time-t0, ) end + +function optimizing(d::D, initial_x::Tx, method::M, + options::Options = Options(;default_options(method)...), + state = initial_state(method, options, d, initial_x)) where {D<:AbstractObjective, M<:AbstractOptimizer, Tx <: AbstractArray} + if length(initial_x) == 1 && typeof(method) <: NelderMead + error("You cannot use NelderMead for univariate problems. Alternatively, use either interval bound univariate optimization, or another method such as BFGS or Newton.") + end + return OptimIterator(d, initial_x, method, options, state) +end + +AbstractOptimizer(istate::IteratorState) = istate.iter.method + +# we can just check minimum, as we've earlier enforced same types/eltypes +# in variables besides the option settings + +function minimizer(istate::IteratorState) + @unpack iter, f_increased = istate + @unpack options, state = iter + f_incr_pick = f_increased && !options.allow_f_increases + return pick_best_x(f_incr_pick, state) +end + +function minimum(istate::IteratorState) + @unpack iter, f_increased = istate + @unpack d, options, state = iter + f_incr_pick = f_increased && !options.allow_f_increases + return pick_best_f(f_incr_pick, state, d) +end + +iterations(istate::IteratorState) = istate.iteration +iteration_limit_reached(istate::IteratorState) = istate.iteration == istate.iter.options.iterations +trace(istate::IteratorState) = istate.tr + +converged(istate::IteratorState) = istate.converged +x_converged(istate::IteratorState) = istate.x_converged +f_converged(istate::IteratorState) = istate.f_converged +g_converged(istate::IteratorState) = istate.g_converged +initial_state(istate::IteratorState) = istate.iter.initial_x + +f_calls(istate::IteratorState) = f_calls(istate.iter.d) +g_calls(istate::IteratorState) = g_calls(istate.iter.d) +h_calls(istate::IteratorState) = h_calls(istate.iter.d) diff --git a/test/general/api.jl b/test/general/api.jl index 240ba0778..0d4546206 100644 --- a/test/general/api.jl +++ b/test/general/api.jl @@ -144,6 +144,32 @@ res_extended_nm = Optim.optimize(f, g!, initial_x, NelderMead(), options_extended_nm) @test haskey(Optim.trace(res_extended_nm)[1].metadata,"centroid") @test haskey(Optim.trace(res_extended_nm)[1].metadata,"step_type") + + local istate + for istate′ in Optim.optimizing(f, initial_x, BFGS(), + Optim.Options(extended_trace = true, + store_trace = true)) + istate = istate′ + break + end + # (smoke) tests for accessor functions: + @test summary(istate) == "BFGS" + @test Optim.minimizer(istate) == initial_x + @test Optim.minimum(istate) == f(initial_x) + @test Optim.iterations(istate) == 0 + @test Optim.iteration_limit_reached(istate) == false + @test Optim.trace(istate) isa Vector{<:Optim.OptimizationState} + @test Optim.x_trace(istate) == [initial_x] + @test Optim.f_trace(istate) == [f(initial_x)] + @test Optim.f_calls(istate) == 1 + @test Optim.converged(istate) == false + @test Optim.g_norm_trace(istate) ≈ [215.6] rtol=1e-6 + @test Optim.g_calls(istate) == 1 + @test Optim.x_converged(istate) == false + @test Optim.f_converged(istate) == false + @test Optim.g_converged(istate) == false + @test Optim.initial_state(istate) == initial_x + @test Optim.OptimizationResults(istate) isa Optim.MultivariateOptimizationResults end # Test univariate API