Zinoex · Zinoex · Apr 24, 2024 · Apr 24, 2024 · Apr 24, 2024 · Apr 24, 2024
diff --git a/ext/cuda/value_iteration.jl b/ext/cuda/value_iteration.jl
@@ -15,18 +15,10 @@ function IntervalMDP.step_imdp!(
     value_function::IntervalMDP.IMDPValueFunction;
     maximize,
     upper_bound,
-    discount = 1.0,
 ) where {R, VR <: AbstractVector{R}, MR <: CuSparseMatrixCSC{R}}
-    ominmax!(
-        ordering,
-        p,
-        prob,
-        value_function.prev;
-        max = upper_bound,
-    )
+    ominmax!(ordering, p, prob, value_function.prev; max = upper_bound)
 
     value_function.action_values .= Transpose(value_function.prev_transpose * p)
-    rmul!(value_function.action_values, discount)
 
     V_per_state =
         CuVectorOfVector(stateptr, value_function.action_values, maximum(diff(stateptr)))
@@ -36,7 +28,6 @@ function IntervalMDP.step_imdp!(
     @cuda blocks = blocks threads = threads extremum_vov_kernel!(
         value_function.cur,
         V_per_state,
-        discount,
         maximize,
     )
 
@@ -46,7 +37,6 @@ end
 function extremum_vov_kernel!(
     V::CuDeviceVector{Tv, A},
     V_per_state::CuDeviceVectorOfVector{Tv, Ti, A},
-    discount,
     maximize,
 ) where {Tv, Ti, A}
     j = blockIdx().x
@@ -83,7 +73,6 @@ function extremum_vov_kernel!(
             i += blockDim().x
         end
 
-        V[j] *= discount
         j += gridDim().x
     end
 end
diff --git a/src/Data/bmdp-tool.jl b/src/Data/bmdp-tool.jl
@@ -180,7 +180,9 @@ function write_bmdp_tool_file(
     end
 end
 
-function write_bmdp_tool_file(path, mdp::IntervalMarkovChain, 
+function write_bmdp_tool_file(
+    path,
+    mdp::IntervalMarkovChain,
     terminal_states::Vector{T},
 ) where {T <: Integer}
     prob = transition_prob(mdp)

diff --git a/src/interval_probabilities.jl b/src/interval_probabilities.jl
@@ -47,10 +47,7 @@ end
 
 # Constructor from lower and gap with sanity assertions
 function IntervalProbabilities(lower::MR, gap::MR) where {R, MR <: AbstractMatrix{R}}
-    @assert all(lower .>= 0) "The lower bound transition probabilities must be non-negative."
-    @assert all(gap .>= 0) "The gap transition probabilities must be non-negative."
-    @assert all(gap .<= 1) "The gap transition probabilities must be less than or equal to 1."
-    @assert all(lower .+ gap .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1."
+    checkprobabilities!(lower, gap)
 
     sum_lower = vec(sum(lower; dims = 1))
 
@@ -65,6 +62,20 @@ function IntervalProbabilities(lower::MR, gap::MR) where {R, MR <: AbstractMatri
     return IntervalProbabilities(lower, gap, sum_lower)
 end
 
+function checkprobabilities!(lower::AbstractMatrix, gap::AbstractMatrix)
+    @assert all(lower .>= 0) "The lower bound transition probabilities must be non-negative."
+    @assert all(gap .>= 0) "The gap transition probabilities must be non-negative."
+    @assert all(gap .<= 1) "The gap transition probabilities must be less than or equal to 1."
+    @assert all(lower .+ gap .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1."
+end
+
+function checkprobabilities!(lower::AbstractSparseMatrix, gap::AbstractSparseMatrix)
+    @assert all(nonzeros(lower) .>= 0) "The lower bound transition probabilities must be non-negative."
+    @assert all(nonzeros(gap) .>= 0) "The gap transition probabilities must be non-negative."
+    @assert all(nonzeros(gap) .<= 1) "The gap transition probabilities must be less than or equal to 1."
+    @assert all(nonzeros(lower) .+ nonzeros(gap) .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1."
+end
+
 # Keyword constructor from lower and upper
 function IntervalProbabilities(; lower::MR, upper::MR) where {MR <: AbstractMatrix}
     lower, gap = compute_gap(lower, upper)

diff --git a/src/specification.jl b/src/specification.jl
@@ -97,6 +97,14 @@ Super type for all reachability-like property.
 """
 abstract type AbstractReachability <: Property end
 
+function initialize!(value_function, prop::AbstractReachability)
+    value_function.prev[reach(prop)] .= 1
+end
+
+function postprocess!(value_function, prop::AbstractReachability)
+    value_function.cur[reach(prop)] .= 1
+end
+
 """
     FiniteTimeReachability{T <: Integer, VT <: AbstractVector{T}}
 
@@ -202,6 +210,11 @@ A property of reachability that includes a set of states to avoid.
 """
 abstract type AbstractReachAvoid <: AbstractReachability end
 
+function postprocess!(value_function, prop::AbstractReachAvoid)
+    value_function.cur[reach(prop)] .= 1
+    value_function.cur[avoid(prop)] .= 0
+end
+
 """
     FiniteTimeReachAvoid{T <: Integer, VT <: AbstractVector{T}}
 
@@ -330,6 +343,15 @@ Super type for all reward specifications.
 """
 abstract type AbstractReward{R <: Real} <: Property end
 
+function initialize!(value_function, prop::AbstractReward)
+    value_function.prev .= reward(prop)
+end
+
+function postprocess!(value_function, prop::AbstractReward)
+    rmul!(value_function.cur, discount(prop))
+    value_function.cur += reward(prop)
+end
+
 function checkreward!(prop::AbstractReward, system::IntervalMarkovProcess)
     @assert length(reward(prop)) == num_states(system)
     @assert 0 < discount(prop)  # Discount must be non-negative.
@@ -471,6 +493,11 @@ Specification(prop::Property) = Specification(prop, Pessimistic)
 Specification(prop::Property, satisfaction::SatisfactionMode) =
     Specification(prop, satisfaction, Maximize)
 
+initialize!(value_function, spec::Specification) =
+    initialize!(value_function, system_property(spec))
+postprocess!(value_function, spec::Specification) =
+    postprocess!(value_function, system_property(spec))
+
 function checkspecification!(spec::Specification, system)
     return checkproperty!(system_property(spec), system)
 end

diff --git a/src/synthesis.jl b/src/synthesis.jl
@@ -24,65 +24,9 @@ end
 
 function extract_time_varying_policy(
     problem::Problem{M, S},
-) where {M <: IntervalMarkovDecisionProcess, S <: Specification{<:AbstractReachability}}
+) where {M <: IntervalMarkovDecisionProcess, S <: Specification}
     mdp = system(problem)
     spec = specification(problem)
-    prop = system_property(spec)
-    term_criteria = termination_criteria(spec)
-    upper_bound = satisfaction_mode(spec) == Optimistic
-    maximize = strategy_mode(spec) == Maximize
-
-    prob = transition_prob(mdp)
-    maxactions = maximum(diff(stateptr(mdp)))
-    target = reach(prop)
-
-    # It is more efficient to use allocate first and reuse across iterations
-    p = deepcopy(gap(prob))  # Deep copy as it may be a vector of vectors and we need sparse arrays to store the same indices
-    ordering = construct_ordering(p)
-
-    value_function = IMDPValueFunction(problem)
-    initialize!(value_function, target, 1.0)
-
-    policy = Vector{Vector{Int}}(undef, 1)
-
-    step_policy = step_imdp_with_extract!(
-        ordering,
-        p,
-        mdp,
-        prob,
-        value_function;
-        maximize = maximize,
-        upper_bound = upper_bound,
-    )
-    policy[1] = step_policy
-    k = 1
-
-    while !term_criteria(value_function.cur, k, lastdiff!(value_function))
-        nextiteration!(value_function)
-        step_policy = step_imdp_with_extract!(
-            ordering,
-            p,
-            mdp,
-            prob,
-            value_function;
-            maximize = maximize,
-            upper_bound = upper_bound,
-        )
-        push!(policy, step_policy)
-        k += 1
-    end
-
-    # The policy vector is built starting from the target state
-    # at the final time step, so we need to reverse it.
-    return reverse(policy)
-end
-
-function extract_time_varying_policy(
-    problem::Problem{M, S},
-) where {M <: IntervalMarkovDecisionProcess, S <: Specification{<:AbstractReward}}
-    mdp = system(problem)
-    spec = specification(problem)
-    prop = system_property(spec)
     term_criteria = termination_criteria(spec)
     upper_bound = satisfaction_mode(spec) == Optimistic
     maximize = strategy_mode(spec) == Maximize
@@ -94,7 +38,7 @@ function extract_time_varying_policy(
     ordering = construct_ordering(p)
 
     value_function = IMDPValueFunction(problem)
-    initialize!(value_function, 1:num_states(mdp), reward(prop))
+    initialize!(value_function, spec)
 
     policy = Vector{Vector{Int}}(undef, 1)
 
@@ -106,10 +50,8 @@ function extract_time_varying_policy(
         value_function;
         maximize = maximize,
         upper_bound = upper_bound,
-        discount = discount(prop),
     )
-    # Add immediate reward
-    value_function.cur += reward(prop)
+    postprocess!(value_function, spec)
     policy[1] = step_policy
     k = 1
 
@@ -123,10 +65,8 @@ function extract_time_varying_policy(
             value_function;
             maximize = maximize,
             upper_bound = upper_bound,
-            discount = discount(prop),
         )
-        # Add immediate reward
-        value_function.cur += reward(prop)
+        postprocess!(value_function, spec)
         push!(policy, step_policy)
         k += 1
     end
@@ -144,22 +84,14 @@ function step_imdp_with_extract!(
     value_function;
     maximize,
     upper_bound,
-    discount = 1.0,
 )
     sptr = stateptr(mdp)
 
-    ominmax!(
-        ordering,
-        p,
-        prob,
-        value_function.prev;
-        max = upper_bound,
-    )
+    ominmax!(ordering, p, prob, value_function.prev; max = upper_bound)
 
     optfun = maximize ? argmax : argmin
 
     value_function.action_values .= Transpose(value_function.prev_transpose * p)
-    rmul!(value_function.action_values, discount)
 
     # Copy to ensure that terminal states are assigned their first (and only) action.
     indices = sptr[1:num_states(mdp)]
@@ -169,7 +101,7 @@ function step_imdp_with_extract!(
         @inbounds s2 = sptr[j + 1]
 
         @inbounds indices[j] =
-            optfun(view(value_function.action_values, s1:(s2 - 1)))  + s1 - 1
+            optfun(view(value_function.action_values, s1:(s2 - 1))) + s1 - 1
         @inbounds value_function.cur[j] = value_function.action_values[indices[j]]
     end