From be180622284d840d8ae56c9137adbeb5a1c0bc2c Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 24 Apr 2024 14:26:40 +0200 Subject: [PATCH 1/4] Restructure value iteration to unify reachability and reward specs --- ext/cuda/value_iteration.jl | 5 - src/specification.jl | 25 ++++ src/synthesis.jl | 70 +---------- src/value_iteration.jl | 233 ++---------------------------------- 4 files changed, 41 insertions(+), 292 deletions(-) diff --git a/ext/cuda/value_iteration.jl b/ext/cuda/value_iteration.jl index dca77e4..22e5fd1 100644 --- a/ext/cuda/value_iteration.jl +++ b/ext/cuda/value_iteration.jl @@ -15,7 +15,6 @@ function IntervalMDP.step_imdp!( value_function::IntervalMDP.IMDPValueFunction; maximize, upper_bound, - discount = 1.0, ) where {R, VR <: AbstractVector{R}, MR <: CuSparseMatrixCSC{R}} ominmax!( ordering, @@ -26,7 +25,6 @@ function IntervalMDP.step_imdp!( ) value_function.action_values .= Transpose(value_function.prev_transpose * p) - rmul!(value_function.action_values, discount) V_per_state = CuVectorOfVector(stateptr, value_function.action_values, maximum(diff(stateptr))) @@ -36,7 +34,6 @@ function IntervalMDP.step_imdp!( @cuda blocks = blocks threads = threads extremum_vov_kernel!( value_function.cur, V_per_state, - discount, maximize, ) @@ -46,7 +43,6 @@ end function extremum_vov_kernel!( V::CuDeviceVector{Tv, A}, V_per_state::CuDeviceVectorOfVector{Tv, Ti, A}, - discount, maximize, ) where {Tv, Ti, A} j = blockIdx().x @@ -83,7 +79,6 @@ function extremum_vov_kernel!( i += blockDim().x end - V[j] *= discount j += gridDim().x end end diff --git a/src/specification.jl b/src/specification.jl index ec8e397..b673474 100644 --- a/src/specification.jl +++ b/src/specification.jl @@ -97,6 +97,14 @@ Super type for all reachability-like property. """ abstract type AbstractReachability <: Property end +function initialize!(value_function, prop::AbstractReachability) + value_function.prev[reach(prop)] .= 1 +end + +function postprocess!(value_function, prop::AbstractReachability) + value_function.cur[reach(prop)] .= 1 +end + """ FiniteTimeReachability{T <: Integer, VT <: AbstractVector{T}} @@ -202,6 +210,11 @@ A property of reachability that includes a set of states to avoid. """ abstract type AbstractReachAvoid <: AbstractReachability end +function postprocess!(value_function, prop::AbstractReachAvoid) + value_function.cur[reach(prop)] .= 1 + value_function.cur[avoid(prop)] .= 0 +end + """ FiniteTimeReachAvoid{T <: Integer, VT <: AbstractVector{T}} @@ -333,6 +346,15 @@ Super type for all reward specifications. """ abstract type AbstractReward{R <: Real} <: Property end +function initialize!(value_function, prop::AbstractReward) + value_function.prev .= reward(prop) +end + +function postprocess!(value_function, prop::AbstractReward) + rmul!(value_function.cur, discount(prop)) + value_function.cur += reward(prop) +end + function checkreward!(prop::AbstractReward, system::IntervalMarkovProcess) @assert length(reward(prop)) == num_states(system) @assert 0 < discount(prop) # Discount must be non-negative. @@ -474,6 +496,9 @@ Specification(prop::Property) = Specification(prop, Pessimistic) Specification(prop::Property, satisfaction::SatisfactionMode) = Specification(prop, satisfaction, Maximize) +initialize!(value_function, spec::Specification) = initialize!(value_function, system_property(spec)) +postprocess!(value_function, spec::Specification) = postprocess!(value_function, system_property(spec)) + function checkspecification!(spec::Specification, system) return checkproperty!(system_property(spec), system) end diff --git a/src/synthesis.jl b/src/synthesis.jl index b961dc3..11bf62e 100644 --- a/src/synthesis.jl +++ b/src/synthesis.jl @@ -24,65 +24,9 @@ end function extract_time_varying_policy( problem::Problem{M, S}, -) where {M <: IntervalMarkovDecisionProcess, S <: Specification{<:AbstractReachability}} +) where {M <: IntervalMarkovDecisionProcess, S <: Specification} mdp = system(problem) spec = specification(problem) - prop = system_property(spec) - term_criteria = termination_criteria(spec) - upper_bound = satisfaction_mode(spec) == Optimistic - maximize = strategy_mode(spec) == Maximize - - prob = transition_prob(mdp) - maxactions = maximum(diff(stateptr(mdp))) - target = reach(prop) - - # It is more efficient to use allocate first and reuse across iterations - p = deepcopy(gap(prob)) # Deep copy as it may be a vector of vectors and we need sparse arrays to store the same indices - ordering = construct_ordering(p) - - value_function = IMDPValueFunction(problem) - initialize!(value_function, target, 1.0) - - policy = Vector{Vector{Int}}(undef, 1) - - step_policy = step_imdp_with_extract!( - ordering, - p, - mdp, - prob, - value_function; - maximize = maximize, - upper_bound = upper_bound, - ) - policy[1] = step_policy - k = 1 - - while !term_criteria(value_function.cur, k, lastdiff!(value_function)) - nextiteration!(value_function) - step_policy = step_imdp_with_extract!( - ordering, - p, - mdp, - prob, - value_function; - maximize = maximize, - upper_bound = upper_bound, - ) - push!(policy, step_policy) - k += 1 - end - - # The policy vector is built starting from the target state - # at the final time step, so we need to reverse it. - return reverse(policy) -end - -function extract_time_varying_policy( - problem::Problem{M, S}, -) where {M <: IntervalMarkovDecisionProcess, S <: Specification{<:AbstractReward}} - mdp = system(problem) - spec = specification(problem) - prop = system_property(spec) term_criteria = termination_criteria(spec) upper_bound = satisfaction_mode(spec) == Optimistic maximize = strategy_mode(spec) == Maximize @@ -94,7 +38,7 @@ function extract_time_varying_policy( ordering = construct_ordering(p) value_function = IMDPValueFunction(problem) - initialize!(value_function, 1:num_states(mdp), reward(prop)) + initialize!(value_function, spec) policy = Vector{Vector{Int}}(undef, 1) @@ -106,10 +50,8 @@ function extract_time_varying_policy( value_function; maximize = maximize, upper_bound = upper_bound, - discount = discount(prop), ) - # Add immediate reward - value_function.cur += reward(prop) + postprocess!(value_function, spec) policy[1] = step_policy k = 1 @@ -123,10 +65,8 @@ function extract_time_varying_policy( value_function; maximize = maximize, upper_bound = upper_bound, - discount = discount(prop), ) - # Add immediate reward - value_function.cur += reward(prop) + postprocess!(value_function, spec) push!(policy, step_policy) k += 1 end @@ -144,7 +84,6 @@ function step_imdp_with_extract!( value_function; maximize, upper_bound, - discount = 1.0, ) sptr = stateptr(mdp) @@ -159,7 +98,6 @@ function step_imdp_with_extract!( optfun = maximize ? argmax : argmin value_function.action_values .= Transpose(value_function.prev_transpose * p) - rmul!(value_function.action_values, discount) # Copy to ensure that terminal states are assigned their first (and only) action. indices = sptr[1:num_states(mdp)] diff --git a/src/value_iteration.jl b/src/value_iteration.jl index 50f96f7..cbbfcf7 100644 --- a/src/value_iteration.jl +++ b/src/value_iteration.jl @@ -19,9 +19,9 @@ termination_criteria(prop, finitetime::Val{false}) = CovergenceCriteria(convergence_eps(prop)) """ - value_iteration(problem::Problem{<:IntervalMarkovChain, Specification{<:AbstractReachability}}) + value_iteration(problem::Problem{<:IntervalMarkovChain, <:Specification}) -Solve optimistic/pessimistic reachability and reach-avoid problems using value iteration for interval Markov chain. +Solve optimistic/pessimistic specification problems using value iteration for interval Markov chain. ### Examples @@ -52,115 +52,29 @@ V, k, residual = value_iteration(problem) """ function value_iteration( problem::Problem{M, S}, -) where {M <: IntervalMarkovChain, S <: Specification{<:AbstractReachability}} +) where {M <: IntervalMarkovChain, S <: Specification} mc = system(problem) spec = specification(problem) - prop = system_property(spec) term_criteria = termination_criteria(spec) upper_bound = satisfaction_mode(spec) == Optimistic prob = transition_prob(mc) - target = reach(prop) # It is more efficient to use allocate first and reuse across iterations p = deepcopy(gap(prob)) # Deep copy as it may be a vector of vectors and we need sparse arrays to store the same indices ordering = construct_ordering(p) value_function = IMCValueFunction(problem) - initialize!(value_function, target, 1.0) + initialize!(value_function, spec) step_imc!(ordering, p, prob, value_function; upper_bound = upper_bound) + postprocess!(value_function, spec) k = 1 while !term_criteria(value_function.cur, k, lastdiff!(value_function)) nextiteration!(value_function) step_imc!(ordering, p, prob, value_function; upper_bound = upper_bound) - - k += 1 - end - - # lastdiff! uses prev to store the latest difference - # and it is already computed from the condition in the loop - return value_function.cur, k, value_function.prev -end - -""" - value_iteration(problem::Problem{<:IntervalMarkovChain, Specification{<:AbstractReward}} - ) - -Find pessimistic/optimistic reward using value iteration for interval Markov chain. - -### Examples - -```jldoctest -prob = IntervalProbabilities(; - lower = [ - 0.0 0.5 0.0 - 0.1 0.3 0.0 - 0.2 0.1 1.0 - ], - upper = [ - 0.5 0.7 0.0 - 0.6 0.5 0.0 - 0.7 0.3 1.0 - ], -) - -mc = IntervalMarkovChain(prob, 1) - -reward = [2.0, 1.0, 0.0] -discount = 0.9 -time_horizon = 10 -prop = FiniteTimeReward(reward, discount, time_horizon) -spec = Specification(prop, Pessimistic) -problem = Problem(mc, spec) -V, k, residual = value_iteration(problem) -``` - -""" -function value_iteration( - problem::Problem{M, S}, -) where {M <: IntervalMarkovChain, S <: Specification{<:AbstractReward}} - mc = system(problem) - spec = specification(problem) - prop = system_property(spec) - term_criteria = termination_criteria(spec) - upper_bound = satisfaction_mode(spec) == Optimistic - - prob = transition_prob(mc) - - # It is more efficient to use allocate first and reuse across iterations - p = deepcopy(gap(prob)) # Deep copy as it may be a vector of vectors and we need sparse arrays to store the same indices - ordering = construct_ordering(p) - - value_function = IMCValueFunction(problem) - initialize!(value_function, 1:num_states(mc), reward(prop)) - - step_imc!( - ordering, - p, - prob, - value_function; - upper_bound = upper_bound, - discount = discount(prop), - ) - # Add immediate reward - value_function.cur += reward(prop) - k = 1 - - while !term_criteria(value_function.cur, k, lastdiff!(value_function)) - nextiteration!(value_function) - step_imc!( - ordering, - p, - prob, - value_function; - upper_bound = upper_bound, - discount = discount(prop), - ) - - # Add immediate reward - value_function.cur += reward(prop) + postprocess!(value_function, spec) k += 1 end @@ -192,13 +106,6 @@ function IMCValueFunction( return IMCValueFunction(prev, Transpose(prev), cur) end -function initialize!(V, indices, values) - view(V.prev, indices) .= values - view(V.cur, indices) .= values - - return V -end - function lastdiff!(V) # Reuse prev to store the latest difference V.prev .-= V.cur @@ -219,20 +126,17 @@ function step_imc!( prob::IntervalProbabilities, value_function::IMCValueFunction; upper_bound, - discount = 1.0, ) ominmax!(ordering, p, prob, value_function.prev; max = upper_bound) - value_function.cur .= Transpose(value_function.prev_transpose * p) - rmul!(value_function.cur, discount) return value_function end """ - value_iteration(problem::Problem{<:IntervalMarkovDecisionProcess, Specification{<:AbstractReachability}}) + value_iteration(problem::Problem{<:IntervalMarkovDecisionProcess, <:Specification}) -Solve minimizes/mazimizes optimistic/pessimistic reachability and reach-avoid problems using value iteration for interval Markov decision processes. +Solve minimizes/mazimizes optimistic/pessimistic specification problems using value iteration for interval Markov decision processes. ### Examples @@ -283,116 +187,9 @@ V, k, residual = value_iteration(problem) """ function value_iteration( problem::Problem{M, S}, -) where {M <: IntervalMarkovDecisionProcess, S <: Specification{<:AbstractReachability}} +) where {M <: IntervalMarkovDecisionProcess, S <: Specification} mdp = system(problem) spec = specification(problem) - prop = system_property(spec) - term_criteria = termination_criteria(spec) - upper_bound = satisfaction_mode(spec) == Optimistic - maximize = strategy_mode(spec) == Maximize - - prob = transition_prob(mdp) - sptr = stateptr(mdp) - maxactions = maximum(diff(sptr)) - target = reach(prop) - - # It is more efficient to use allocate first and reuse across iterations - p = deepcopy(gap(prob)) # Deep copy as it may be a vector of vectors and we need sparse arrays to store the same indices - ordering = construct_ordering(p) - - value_function = IMDPValueFunction(problem) - initialize!(value_function, target, 1.0) - - step_imdp!( - ordering, - p, - prob, - sptr, - value_function; - maximize = maximize, - upper_bound = upper_bound, - ) - k = 1 - - while !term_criteria(value_function.cur, k, lastdiff!(value_function)) - nextiteration!(value_function) - step_imdp!( - ordering, - p, - prob, - sptr, - value_function; - maximize = maximize, - upper_bound = upper_bound, - ) - - k += 1 - end - - # lastdiff! uses prev to store the latest difference - # and it is already computed from the condition in the loop - return value_function.cur, k, value_function.prev -end - -""" - value_iteration(problem::Problem{<:IntervalMarkovDecisionProcess, Specification{<:AbstractReward}}) - -Find optimistic/pessimistic reward using value iteration for interval Markov decision process. - -### Examples - -```jldoctest -prob1 = IntervalProbabilities(; - lower = [ - 0.0 0.5 - 0.1 0.3 - 0.2 0.1 - ], - upper = [ - 0.5 0.7 - 0.6 0.5 - 0.7 0.3 - ], -) - -prob2 = IntervalProbabilities(; - lower = [ - 0.1 0.2 - 0.2 0.3 - 0.3 0.4 - ], - upper = [ - 0.6 0.6 - 0.5 0.5 - 0.4 0.4 - ], -) - -prob3 = IntervalProbabilities(; - lower = [0.0; 0.0; 1.0], - upper = [0.0; 0.0; 1.0] -) - -transition_probs = [["a1", "a2"] => prob1, ["a1", "a2"] => prob2, ["sinking"] => prob3] -initial_state = 1 -mdp = IntervalMarkovDecisionProcess(transition_probs, initial_state) - -reward = [2.0, 1.0, 0.0] -discount = 0.9 -time_horizon = 10 -prop = FiniteTimeReward(reward, discount, time_horizon) -spec = Specification(prop, Pessimistic, Maximize) -problem = Problem(mdp, spec) -V, k, residual = value_iteration(problem) -``` - -""" -function value_iteration( - problem::Problem{M, S}, -) where {M <: IntervalMarkovDecisionProcess, S <: Specification{<:AbstractReward}} - mdp = system(problem) - spec = specification(problem) - prop = system_property(spec) term_criteria = termination_criteria(spec) upper_bound = satisfaction_mode(spec) == Optimistic maximize = strategy_mode(spec) == Maximize @@ -405,7 +202,7 @@ function value_iteration( ordering = construct_ordering(p) value_function = IMDPValueFunction(problem) - initialize!(value_function, 1:num_states(mdp), reward(prop)) + initialize!(value_function, spec) step_imdp!( ordering, @@ -415,10 +212,8 @@ function value_iteration( value_function; maximize = maximize, upper_bound = upper_bound, - discount = discount(prop), ) - # Add immediate reward - value_function.cur += reward(prop) + postprocess!(value_function, spec) k = 1 while !term_criteria(value_function.cur, k, lastdiff!(value_function)) @@ -431,10 +226,8 @@ function value_iteration( value_function; maximize = maximize, upper_bound = upper_bound, - discount = discount(prop), ) - # Add immediate reward - value_function.cur += reward(prop) + postprocess!(value_function, spec) k += 1 end @@ -477,7 +270,6 @@ function step_imdp!( value_function; maximize, upper_bound, - discount = 1.0, ) ominmax!( ordering, @@ -490,7 +282,6 @@ function step_imdp!( optfun = maximize ? maximum : minimum value_function.action_values .= Transpose(value_function.prev_transpose * p) - rmul!(value_function.action_values, discount) @inbounds for j in 1:num_target(prob) @inbounds s1 = stateptr[j] From 13a2ffbd840698e2fbdd48f443fc942c18825ce6 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 24 Apr 2024 14:27:16 +0200 Subject: [PATCH 2/4] Test that reachability is monotone --- test/cuda/vi.jl | 7 +++++++ test/sparse/vi.jl | 7 +++++++ test/vi.jl | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/test/cuda/vi.jl b/test/cuda/vi.jl index f9caa89..80b9f39 100644 --- a/test/cuda/vi.jl +++ b/test/cuda/vi.jl @@ -19,6 +19,13 @@ problem = Problem(mc, spec) V_fixed_it, k, _ = value_iteration(problem) @test k == 10 +prop = FiniteTimeReachability([3], 11) +spec = Specification(prop, Pessimistic) +problem = Problem(mc, spec) +V_fixed_it2, k, _ = value_iteration(problem) +@test k == 11 +@test all(V_fixed_it .<= V_fixed_it2) + prop = InfiniteTimeReachability([3], 1e-6) spec = Specification(prop, Pessimistic) problem = Problem(mc, spec) diff --git a/test/sparse/vi.jl b/test/sparse/vi.jl index 9cf5b7a..cd904d0 100644 --- a/test/sparse/vi.jl +++ b/test/sparse/vi.jl @@ -19,6 +19,13 @@ problem = Problem(mc, spec) V_fixed_it, k, _ = value_iteration(problem) @test k == 10 +prop = FiniteTimeReachability([3], 11) +spec = Specification(prop, Pessimistic) +problem = Problem(mc, spec) +V_fixed_it2, k, _ = value_iteration(problem) +@test k == 11 +@test all(V_fixed_it .<= V_fixed_it2) + prop = InfiniteTimeReachability([3], 1e-6) spec = Specification(prop, Pessimistic) problem = Problem(mc, spec) diff --git a/test/vi.jl b/test/vi.jl index 2aae4aa..abd7164 100644 --- a/test/vi.jl +++ b/test/vi.jl @@ -22,6 +22,13 @@ problem = Problem(mc, spec) V_fixed_it, k, _ = value_iteration(problem) @test k == 10 +prop = FiniteTimeReachability([3], 11) +spec = Specification(prop, Pessimistic) +problem = Problem(mc, spec) +V_fixed_it2, k, _ = value_iteration(problem) +@test k == 11 +@test all(V_fixed_it .<= V_fixed_it2) + prop = InfiniteTimeReachability([3], 1e-6) spec = Specification(prop, Pessimistic) problem = Problem(mc, spec) From 84a97b3e795259293e0284e3d5409e1488d745a6 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 24 Apr 2024 20:46:21 +0200 Subject: [PATCH 3/4] Efficient probability check --- src/interval_probabilities.jl | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/interval_probabilities.jl b/src/interval_probabilities.jl index 2ba4b5a..6eab058 100644 --- a/src/interval_probabilities.jl +++ b/src/interval_probabilities.jl @@ -47,10 +47,7 @@ end # Constructor from lower and gap with sanity assertions function IntervalProbabilities(lower::MR, gap::MR) where {R, MR <: AbstractMatrix{R}} - @assert all(lower .>= 0) "The lower bound transition probabilities must be non-negative." - @assert all(gap .>= 0) "The gap transition probabilities must be non-negative." - @assert all(gap .<= 1) "The gap transition probabilities must be less than or equal to 1." - @assert all(lower .+ gap .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1." + checkprobabilities!(lower, gap) sum_lower = vec(sum(lower; dims = 1)) @@ -65,6 +62,20 @@ function IntervalProbabilities(lower::MR, gap::MR) where {R, MR <: AbstractMatri return IntervalProbabilities(lower, gap, sum_lower) end +function checkprobabilities!(lower::AbstractMatrix, gap::AbstractMatrix) + @assert all(lower .>= 0) "The lower bound transition probabilities must be non-negative." + @assert all(gap .>= 0) "The gap transition probabilities must be non-negative." + @assert all(gap .<= 1) "The gap transition probabilities must be less than or equal to 1." + @assert all(lower .+ gap .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1." +end + +function checkprobabilities!(lower::AbstractSparseMatrix, gap::AbstractSparseMatrix) + @assert all(nonzeros(lower) .>= 0) "The lower bound transition probabilities must be non-negative." + @assert all(nonzeros(gap) .>= 0) "The gap transition probabilities must be non-negative." + @assert all(nonzeros(gap) .<= 1) "The gap transition probabilities must be less than or equal to 1." + @assert all(nonzeros(lower) .+ nonzeros(gap) .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1." +end + # Keyword constructor from lower and upper function IntervalProbabilities(; lower::MR, upper::MR) where {MR <: AbstractMatrix} lower, gap = compute_gap(lower, upper) From 3f95d730b1cb9feacded0cfce2ece4d61423ee54 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 24 Apr 2024 20:50:33 +0200 Subject: [PATCH 4/4] Format --- ext/cuda/value_iteration.jl | 8 +------- src/Data/bmdp-tool.jl | 4 +++- src/specification.jl | 6 ++++-- src/synthesis.jl | 10 ++-------- src/value_iteration.jl | 33 ++++++++++----------------------- 5 files changed, 20 insertions(+), 41 deletions(-) diff --git a/ext/cuda/value_iteration.jl b/ext/cuda/value_iteration.jl index 22e5fd1..1a6f898 100644 --- a/ext/cuda/value_iteration.jl +++ b/ext/cuda/value_iteration.jl @@ -16,13 +16,7 @@ function IntervalMDP.step_imdp!( maximize, upper_bound, ) where {R, VR <: AbstractVector{R}, MR <: CuSparseMatrixCSC{R}} - ominmax!( - ordering, - p, - prob, - value_function.prev; - max = upper_bound, - ) + ominmax!(ordering, p, prob, value_function.prev; max = upper_bound) value_function.action_values .= Transpose(value_function.prev_transpose * p) diff --git a/src/Data/bmdp-tool.jl b/src/Data/bmdp-tool.jl index 0372d04..591498c 100644 --- a/src/Data/bmdp-tool.jl +++ b/src/Data/bmdp-tool.jl @@ -180,7 +180,9 @@ function write_bmdp_tool_file( end end -function write_bmdp_tool_file(path, mdp::IntervalMarkovChain, +function write_bmdp_tool_file( + path, + mdp::IntervalMarkovChain, terminal_states::Vector{T}, ) where {T <: Integer} prob = transition_prob(mdp) diff --git a/src/specification.jl b/src/specification.jl index ee07be6..3c91050 100644 --- a/src/specification.jl +++ b/src/specification.jl @@ -493,8 +493,10 @@ Specification(prop::Property) = Specification(prop, Pessimistic) Specification(prop::Property, satisfaction::SatisfactionMode) = Specification(prop, satisfaction, Maximize) -initialize!(value_function, spec::Specification) = initialize!(value_function, system_property(spec)) -postprocess!(value_function, spec::Specification) = postprocess!(value_function, system_property(spec)) +initialize!(value_function, spec::Specification) = + initialize!(value_function, system_property(spec)) +postprocess!(value_function, spec::Specification) = + postprocess!(value_function, system_property(spec)) function checkspecification!(spec::Specification, system) return checkproperty!(system_property(spec), system) diff --git a/src/synthesis.jl b/src/synthesis.jl index 11bf62e..a748bea 100644 --- a/src/synthesis.jl +++ b/src/synthesis.jl @@ -87,13 +87,7 @@ function step_imdp_with_extract!( ) sptr = stateptr(mdp) - ominmax!( - ordering, - p, - prob, - value_function.prev; - max = upper_bound, - ) + ominmax!(ordering, p, prob, value_function.prev; max = upper_bound) optfun = maximize ? argmax : argmin @@ -107,7 +101,7 @@ function step_imdp_with_extract!( @inbounds s2 = sptr[j + 1] @inbounds indices[j] = - optfun(view(value_function.action_values, s1:(s2 - 1))) + s1 - 1 + optfun(view(value_function.action_values, s1:(s2 - 1))) + s1 - 1 @inbounds value_function.cur[j] = value_function.action_values[indices[j]] end diff --git a/src/value_iteration.jl b/src/value_iteration.jl index cbbfcf7..2a885ef 100644 --- a/src/value_iteration.jl +++ b/src/value_iteration.jl @@ -90,14 +90,12 @@ function construct_value_function(::AbstractMatrix{R}, num_states) where {R} end mutable struct IMCValueFunction - prev - prev_transpose - cur + prev::Any + prev_transpose::Any + cur::Any end -function IMCValueFunction( - problem::Problem{M, S}, -) where {M <: IntervalMarkovChain, S} +function IMCValueFunction(problem::Problem{M, S}) where {M <: IntervalMarkovChain, S} mc = system(problem) prev = construct_value_function(gap(transition_prob(mc)), num_states(mc)) @@ -238,10 +236,10 @@ function value_iteration( end mutable struct IMDPValueFunction - prev - prev_transpose - cur - action_values + prev::Any + prev_transpose::Any + cur::Any + action_values::Any end function IMDPValueFunction( @@ -254,12 +252,7 @@ function IMDPValueFunction( action_values = similar(prev, num_choices(mdp)) - return IMDPValueFunction( - prev, - Transpose(prev), - cur, - action_values - ) + return IMDPValueFunction(prev, Transpose(prev), cur, action_values) end function step_imdp!( @@ -271,13 +264,7 @@ function step_imdp!( maximize, upper_bound, ) - ominmax!( - ordering, - p, - prob, - value_function.prev; - max = upper_bound, - ) + ominmax!(ordering, p, prob, value_function.prev; max = upper_bound) optfun = maximize ? maximum : minimum