Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify value function #36

Merged
merged 5 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 1 addition & 12 deletions ext/cuda/value_iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,10 @@ function IntervalMDP.step_imdp!(
value_function::IntervalMDP.IMDPValueFunction;
maximize,
upper_bound,
discount = 1.0,
) where {R, VR <: AbstractVector{R}, MR <: CuSparseMatrixCSC{R}}
ominmax!(
ordering,
p,
prob,
value_function.prev;
max = upper_bound,
)
ominmax!(ordering, p, prob, value_function.prev; max = upper_bound)

value_function.action_values .= Transpose(value_function.prev_transpose * p)
rmul!(value_function.action_values, discount)

V_per_state =
CuVectorOfVector(stateptr, value_function.action_values, maximum(diff(stateptr)))
Expand All @@ -36,7 +28,6 @@ function IntervalMDP.step_imdp!(
@cuda blocks = blocks threads = threads extremum_vov_kernel!(
value_function.cur,
V_per_state,
discount,
maximize,
)

Expand All @@ -46,7 +37,6 @@ end
function extremum_vov_kernel!(
V::CuDeviceVector{Tv, A},
V_per_state::CuDeviceVectorOfVector{Tv, Ti, A},
discount,
maximize,
) where {Tv, Ti, A}
j = blockIdx().x
Expand Down Expand Up @@ -83,7 +73,6 @@ function extremum_vov_kernel!(
i += blockDim().x
end

V[j] *= discount
j += gridDim().x
end
end
4 changes: 3 additions & 1 deletion src/Data/bmdp-tool.jl
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,9 @@ function write_bmdp_tool_file(
end
end

function write_bmdp_tool_file(path, mdp::IntervalMarkovChain,
function write_bmdp_tool_file(
path,
mdp::IntervalMarkovChain,
terminal_states::Vector{T},
) where {T <: Integer}
prob = transition_prob(mdp)
Expand Down
19 changes: 15 additions & 4 deletions src/interval_probabilities.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,7 @@ end

# Constructor from lower and gap with sanity assertions
function IntervalProbabilities(lower::MR, gap::MR) where {R, MR <: AbstractMatrix{R}}
@assert all(lower .>= 0) "The lower bound transition probabilities must be non-negative."
@assert all(gap .>= 0) "The gap transition probabilities must be non-negative."
@assert all(gap .<= 1) "The gap transition probabilities must be less than or equal to 1."
@assert all(lower .+ gap .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1."
checkprobabilities!(lower, gap)

sum_lower = vec(sum(lower; dims = 1))

Expand All @@ -65,6 +62,20 @@ function IntervalProbabilities(lower::MR, gap::MR) where {R, MR <: AbstractMatri
return IntervalProbabilities(lower, gap, sum_lower)
end

function checkprobabilities!(lower::AbstractMatrix, gap::AbstractMatrix)
@assert all(lower .>= 0) "The lower bound transition probabilities must be non-negative."
@assert all(gap .>= 0) "The gap transition probabilities must be non-negative."
@assert all(gap .<= 1) "The gap transition probabilities must be less than or equal to 1."
@assert all(lower .+ gap .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1."
end

function checkprobabilities!(lower::AbstractSparseMatrix, gap::AbstractSparseMatrix)
@assert all(nonzeros(lower) .>= 0) "The lower bound transition probabilities must be non-negative."
@assert all(nonzeros(gap) .>= 0) "The gap transition probabilities must be non-negative."
@assert all(nonzeros(gap) .<= 1) "The gap transition probabilities must be less than or equal to 1."
@assert all(nonzeros(lower) .+ nonzeros(gap) .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1."
end

# Keyword constructor from lower and upper
function IntervalProbabilities(; lower::MR, upper::MR) where {MR <: AbstractMatrix}
lower, gap = compute_gap(lower, upper)
Expand Down
27 changes: 27 additions & 0 deletions src/specification.jl
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,14 @@ Super type for all reachability-like property.
"""
abstract type AbstractReachability <: Property end

function initialize!(value_function, prop::AbstractReachability)
value_function.prev[reach(prop)] .= 1
end

function postprocess!(value_function, prop::AbstractReachability)
value_function.cur[reach(prop)] .= 1
end

"""
FiniteTimeReachability{T <: Integer, VT <: AbstractVector{T}}

Expand Down Expand Up @@ -202,6 +210,11 @@ A property of reachability that includes a set of states to avoid.
"""
abstract type AbstractReachAvoid <: AbstractReachability end

function postprocess!(value_function, prop::AbstractReachAvoid)
value_function.cur[reach(prop)] .= 1
value_function.cur[avoid(prop)] .= 0
end

"""
FiniteTimeReachAvoid{T <: Integer, VT <: AbstractVector{T}}

Expand Down Expand Up @@ -330,6 +343,15 @@ Super type for all reward specifications.
"""
abstract type AbstractReward{R <: Real} <: Property end

function initialize!(value_function, prop::AbstractReward)
value_function.prev .= reward(prop)
end

function postprocess!(value_function, prop::AbstractReward)
rmul!(value_function.cur, discount(prop))
value_function.cur += reward(prop)
end

function checkreward!(prop::AbstractReward, system::IntervalMarkovProcess)
@assert length(reward(prop)) == num_states(system)
@assert 0 < discount(prop) # Discount must be non-negative.
Expand Down Expand Up @@ -471,6 +493,11 @@ Specification(prop::Property) = Specification(prop, Pessimistic)
Specification(prop::Property, satisfaction::SatisfactionMode) =
Specification(prop, satisfaction, Maximize)

initialize!(value_function, spec::Specification) =
initialize!(value_function, system_property(spec))
postprocess!(value_function, spec::Specification) =
postprocess!(value_function, system_property(spec))

function checkspecification!(spec::Specification, system)
return checkproperty!(system_property(spec), system)
end
Expand Down
80 changes: 6 additions & 74 deletions src/synthesis.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,65 +24,9 @@ end

function extract_time_varying_policy(
problem::Problem{M, S},
) where {M <: IntervalMarkovDecisionProcess, S <: Specification{<:AbstractReachability}}
) where {M <: IntervalMarkovDecisionProcess, S <: Specification}
mdp = system(problem)
spec = specification(problem)
prop = system_property(spec)
term_criteria = termination_criteria(spec)
upper_bound = satisfaction_mode(spec) == Optimistic
maximize = strategy_mode(spec) == Maximize

prob = transition_prob(mdp)
maxactions = maximum(diff(stateptr(mdp)))
target = reach(prop)

# It is more efficient to use allocate first and reuse across iterations
p = deepcopy(gap(prob)) # Deep copy as it may be a vector of vectors and we need sparse arrays to store the same indices
ordering = construct_ordering(p)

value_function = IMDPValueFunction(problem)
initialize!(value_function, target, 1.0)

policy = Vector{Vector{Int}}(undef, 1)

step_policy = step_imdp_with_extract!(
ordering,
p,
mdp,
prob,
value_function;
maximize = maximize,
upper_bound = upper_bound,
)
policy[1] = step_policy
k = 1

while !term_criteria(value_function.cur, k, lastdiff!(value_function))
nextiteration!(value_function)
step_policy = step_imdp_with_extract!(
ordering,
p,
mdp,
prob,
value_function;
maximize = maximize,
upper_bound = upper_bound,
)
push!(policy, step_policy)
k += 1
end

# The policy vector is built starting from the target state
# at the final time step, so we need to reverse it.
return reverse(policy)
end

function extract_time_varying_policy(
problem::Problem{M, S},
) where {M <: IntervalMarkovDecisionProcess, S <: Specification{<:AbstractReward}}
mdp = system(problem)
spec = specification(problem)
prop = system_property(spec)
term_criteria = termination_criteria(spec)
upper_bound = satisfaction_mode(spec) == Optimistic
maximize = strategy_mode(spec) == Maximize
Expand All @@ -94,7 +38,7 @@ function extract_time_varying_policy(
ordering = construct_ordering(p)

value_function = IMDPValueFunction(problem)
initialize!(value_function, 1:num_states(mdp), reward(prop))
initialize!(value_function, spec)

policy = Vector{Vector{Int}}(undef, 1)

Expand All @@ -106,10 +50,8 @@ function extract_time_varying_policy(
value_function;
maximize = maximize,
upper_bound = upper_bound,
discount = discount(prop),
)
# Add immediate reward
value_function.cur += reward(prop)
postprocess!(value_function, spec)
policy[1] = step_policy
k = 1

Expand All @@ -123,10 +65,8 @@ function extract_time_varying_policy(
value_function;
maximize = maximize,
upper_bound = upper_bound,
discount = discount(prop),
)
# Add immediate reward
value_function.cur += reward(prop)
postprocess!(value_function, spec)
push!(policy, step_policy)
k += 1
end
Expand All @@ -144,22 +84,14 @@ function step_imdp_with_extract!(
value_function;
maximize,
upper_bound,
discount = 1.0,
)
sptr = stateptr(mdp)

ominmax!(
ordering,
p,
prob,
value_function.prev;
max = upper_bound,
)
ominmax!(ordering, p, prob, value_function.prev; max = upper_bound)

optfun = maximize ? argmax : argmin

value_function.action_values .= Transpose(value_function.prev_transpose * p)
rmul!(value_function.action_values, discount)

# Copy to ensure that terminal states are assigned their first (and only) action.
indices = sptr[1:num_states(mdp)]
Expand All @@ -169,7 +101,7 @@ function step_imdp_with_extract!(
@inbounds s2 = sptr[j + 1]

@inbounds indices[j] =
optfun(view(value_function.action_values, s1:(s2 - 1))) + s1 - 1
optfun(view(value_function.action_values, s1:(s2 - 1))) + s1 - 1
@inbounds value_function.cur[j] = value_function.action_values[indices[j]]
end

Expand Down
Loading
Loading