From afd3c4227cfee1c4b03654e545f7ceb43264954a Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 4 Dec 2024 16:28:54 +0100 Subject: [PATCH 1/3] Document ambiguity set structures --- docs/src/reference/systems.md | 42 +++++++++-- .../MixtureIntervalProbabilities.jl | 61 ++++++++++++++-- .../OrthogonalIntervalProbabilities.jl | 69 +++++++++++++++++-- 3 files changed, 153 insertions(+), 19 deletions(-) diff --git a/docs/src/reference/systems.md b/docs/src/reference/systems.md index 5a0de81..27e7034 100644 --- a/docs/src/reference/systems.md +++ b/docs/src/reference/systems.md @@ -18,17 +18,45 @@ stateptr(mdp::MixtureIntervalMarkovDecisionProcess) ``` ## Probability representation + +### Interval ambiguity sets ```@docs IntervalProbabilities +lower(p::IntervalProbabilities) +lower(p::IntervalProbabilities, i, j) +upper(p::IntervalProbabilities) +upper(p::IntervalProbabilities, i, j) +gap(p::IntervalProbabilities) +gap(p::IntervalProbabilities, i, j) +sum_lower(p::IntervalProbabilities) +sum_lower(p::IntervalProbabilities, j) +num_source(p::IntervalProbabilities) +num_target(p::IntervalProbabilities) +axes_source(p::IntervalProbabilities) +``` + +### Marginal interval ambiguity sets +```@docs OrthogonalIntervalProbabilities +lower(p::OrthogonalIntervalProbabilities, l) +lower(p::OrthogonalIntervalProbabilities, l, i, j) +upper(p::OrthogonalIntervalProbabilities, l) +upper(p::OrthogonalIntervalProbabilities, l, i, j) +gap(p::OrthogonalIntervalProbabilities, l) +gap(p::OrthogonalIntervalProbabilities, l, i, j) +sum_lower(p::OrthogonalIntervalProbabilities, l) +sum_lower(p::OrthogonalIntervalProbabilities, l, j) +num_source(p::OrthogonalIntervalProbabilities) +num_target(p::OrthogonalIntervalProbabilities{1}) +axes_source(p::OrthogonalIntervalProbabilities) +``` + +### Mixtures of marginal interval ambiguity sets +```@docs MixtureIntervalProbabilities -lower -upper -gap -sum_lower -num_source -num_target -axes_source +num_source(p::MixtureIntervalProbabilities) +num_target(p::MixtureIntervalProbabilities) +axes_source(p::MixtureIntervalProbabilities) mixture_probs weighting_probs ``` diff --git a/src/probabilities/MixtureIntervalProbabilities.jl b/src/probabilities/MixtureIntervalProbabilities.jl index 2ba24ac..84521fd 100644 --- a/src/probabilities/MixtureIntervalProbabilities.jl +++ b/src/probabilities/MixtureIntervalProbabilities.jl @@ -1,15 +1,60 @@ """ MixtureIntervalProbabilities{N, P <: OrthogonalIntervalProbabilities, Q <: IntervalProbabilities} -A tuple of `OrthogonalIntervalProbabilities` transition probabilities that all share the same source states, or source/action pairs, and target states. +A tuple of `OrthogonalIntervalProbabilities` for independent transition probabilities in a mixture that all share +the same source/action pairs, and target states. See [OrthogonalIntervalProbabilities](@ref) for more information on the structure of the transition probabilities +for each model in the mixture. The mixture is weighted by an `IntervalProbabilities` ambiguity set, called `weighting_probs`. ### Fields -- `probs::NTuple{N, P}`: A tuple of `IntervalProbabilities` transition probabilities along each axis. -- `source_dims::NTuple{N, Int32}`: The dimensions of the orthogonal probabilities for the source axis. This is flattened to a single dimension for indexing. +- `mixture_probs::NTuple{N, P}`: A tuple of `OrthogonalIntervalProbabilities` transition probabilities along each axis. +- `weighting_probs::Q`: The weighting ambiguity set for the mixture. ### Examples -# TODO: Update example +Below is a simple example of a mixture of two `OrthogonalIntervalProbabilities` with one dimension and the same source/action pairs and target states, +and a weighting ambiguity set. ```jldoctest +prob1 = OrthogonalIntervalProbabilities( + ( + IntervalProbabilities(; + lower = [ + 0.0 0.5 + 0.1 0.3 + 0.2 0.1 + ], + upper = [ + 0.5 0.7 + 0.6 0.5 + 0.7 0.3 + ], + ), + ), + (Int32(2),), +) +prob2 = OrthogonalIntervalProbabilities( + ( + IntervalProbabilities(; + lower = [ + 0.1 0.4 + 0.2 0.2 + 0.3 0.0 + ], + upper = [ + 0.4 0.6 + 0.5 0.4 + 0.6 0.2 + ], + ), + ), + (Int32(2),), +) +weighting_probs = IntervalProbabilities(; lower = [ + 0.3 0.5 + 0.4 0.3 +], upper = [ + 0.8 0.7 + 0.7 0.5 +]) +mixture_prob = MixtureIntervalProbabilities((prob1, prob2), weighting_probs) ``` """ struct MixtureIntervalProbabilities{ @@ -86,7 +131,7 @@ mixture_probs(p::MixtureIntervalProbabilities) = p.mixture_probs """ mixture_probs(p::MixtureIntervalProbabilities, k) -Return the tuple of `OrthogonalIntervalProbabilities` transition probabilities. +Return ``k``-th `OrthogonalIntervalProbabilities` transition probabilities. """ mixture_probs(p::MixtureIntervalProbabilities, k) = p.mixture_probs[k] @@ -104,7 +149,13 @@ Return the valid range of indices for the source states or source/action pairs. """ axes_source(p::MixtureIntervalProbabilities) = axes_source(first(p.mixture_probs)) +""" + num_target(p::MixtureIntervalProbabilities) + +Return the number of target states along each marginal. +""" num_target(p::MixtureIntervalProbabilities) = num_target(first(p.mixture_probs)) + stateptr(p::MixtureIntervalProbabilities) = stateptr(first(p.mixture_probs)) Base.ndims(p::MixtureIntervalProbabilities{N}) where {N} = N diff --git a/src/probabilities/OrthogonalIntervalProbabilities.jl b/src/probabilities/OrthogonalIntervalProbabilities.jl index 7904bc0..0b864a5 100644 --- a/src/probabilities/OrthogonalIntervalProbabilities.jl +++ b/src/probabilities/OrthogonalIntervalProbabilities.jl @@ -1,16 +1,65 @@ """ OrthogonalIntervalProbabilities{N, P <: IntervalProbabilities} -A tuple of `IntervalProbabilities` transition probabilities from all source states or source/action pairs to the -target states along each axis. +A tuple of `IntervalProbabilities` for (marginal) transition probabilities from all source/action pairs to the target states along each axis, +with target states/marginals on the rows and source states or source/action pairs on the columns. The source states are ordered in +a column-major order, i.e., the first axis of source states is the fastest, similar to the ordering of a multi-dimensional array in Julia. +E.g. for an `OrthogonalIntervalProbabilities` with `source_dims == (3, 3, 3)` and 2 actions for each source state ``\\{a_1, a_2\\}``, +the columns in order represent the collowing: + +```math + ((1, 1, 1), a_1), ((1, 1, 1), a_2), (2, 1, 1), a_1), ((2, 1, 1), a_2), ..., ((3, 3, 3), a_1), ((3, 3, 3), a_2). +``` + +The number of target states correspond to the number of rows in the transition probabilities of each axis. + ### Fields -- `probs::NTuple{N, P}`: A tuple of `IntervalProbabilities` transition probabilities along each axis. +- `probs::NTuple{N, P}`: A tuple of `IntervalProbabilities` for (marginal) transition probabilities along each axis. - `source_dims::NTuple{N, Int32}`: The dimensions of the orthogonal probabilities for the source axis. This is flattened to a single dimension for indexing. ### Examples -# TODO: Update example +An example of OrthogonalIntervalProbabilities with 3 axes and 3 states for each axis, only one action per state. +Therefore, the `source_dims` is (3, 3, 3) and the number of columns of the transition probabilities is 27. + ```jldoctest +lower1 = [ + 1/15 3/10 1/15 3/10 1/30 1/3 7/30 4/15 1/6 1/5 1/10 1/5 0 7/30 7/30 1/5 2/15 1/6 1/10 1/30 1/10 1/15 1/10 1/15 4/15 4/15 1/3 + 1/5 4/15 1/10 1/5 3/10 3/10 1/10 1/15 3/10 3/10 7/30 1/5 1/10 1/5 1/5 1/30 1/5 3/10 1/5 1/5 1/10 1/30 4/15 1/10 1/5 1/6 7/30 + 4/15 1/30 1/5 1/5 7/30 4/15 2/15 7/30 1/5 1/3 2/15 1/6 1/6 1/3 4/15 3/10 1/30 3/10 3/10 1/10 1/15 1/30 2/15 1/6 1/5 1/10 4/15 +] +upper1 = [ + 7/15 17/30 13/30 3/5 17/30 17/30 17/30 13/30 3/5 2/3 11/30 7/15 0 1/2 17/30 13/30 7/15 13/30 17/30 13/30 2/5 2/5 2/3 2/5 17/30 2/5 19/30 + 8/15 1/2 3/5 7/15 8/15 17/30 2/3 17/30 11/30 7/15 19/30 19/30 13/15 1/2 17/30 13/30 3/5 11/30 8/15 7/15 7/15 13/30 8/15 2/5 8/15 17/30 3/5 + 11/30 1/3 2/5 8/15 7/15 3/5 2/3 17/30 2/3 8/15 2/15 3/5 2/3 3/5 17/30 2/3 7/15 8/15 2/5 2/5 11/30 17/30 17/30 1/2 2/5 19/30 13/30 +] +prob1 = IntervalProbabilities(; lower = lower1, upper = upper1) + +lower2 = [ + 1/10 1/15 3/10 0 1/6 1/15 1/15 1/6 1/6 1/30 1/10 1/10 1/3 2/15 3/10 4/15 2/15 2/15 1/6 7/30 1/15 2/15 1/10 1/3 7/30 1/30 7/30 + 3/10 1/5 3/10 2/15 0 1/30 0 1/15 1/30 7/30 1/30 1/15 7/30 1/15 1/6 1/30 1/10 1/15 3/10 0 3/10 1/6 3/10 1/5 0 7/30 2/15 + 3/10 4/15 1/10 3/10 2/15 1/3 3/10 1/10 1/6 3/10 7/30 1/6 1/15 1/15 1/10 1/5 1/5 4/15 1/15 1/3 2/15 1/15 1/5 1/5 1/15 7/30 1/15 +] +upper2 = [ + 2/5 17/30 3/5 11/30 3/5 7/15 19/30 2/5 3/5 2/3 2/3 8/15 8/15 19/30 8/15 8/15 13/30 13/30 13/30 17/30 17/30 13/30 11/30 19/30 8/15 2/5 8/15 + 1/3 13/30 11/30 2/5 2/3 2/3 0 13/30 1/2 17/30 17/30 1/3 2/5 1/3 13/30 11/30 8/15 1/3 1/2 8/15 8/15 8/15 8/15 2/5 3/5 2/3 13/30 + 17/30 3/5 8/15 1/2 7/15 1/2 2/3 17/30 11/30 2/5 1/2 7/15 2/5 17/30 11/30 2/5 11/30 2/3 1/3 2/3 17/30 8/15 17/30 3/5 2/5 19/30 11/30 +] +prob2 = IntervalProbabilities(; lower = lower2, upper = upper2) + +lower3 = [ + 4/15 1/5 3/10 3/10 4/15 7/30 1/5 4/15 7/30 1/6 1/5 0 1/15 1/30 3/10 1/3 2/15 1/15 7/30 4/15 1/10 1/3 1/5 7/30 1/30 1/5 7/30 + 2/15 4/15 1/10 1/30 7/30 2/15 1/15 1/30 3/10 1/3 1/5 1/10 2/15 1/30 2/15 4/15 0 4/15 1/5 4/15 1/10 1/10 1/3 7/30 3/10 1/3 3/10 + 1/5 1/3 3/10 1/10 1/15 1/10 1/30 1/5 2/15 7/30 1/3 2/15 1/10 1/6 3/10 1/5 7/30 1/30 0 1/30 1/15 2/15 1/6 7/30 4/15 4/15 7/30 +] +upper3 = [ + 3/5 17/30 1/2 3/5 19/30 2/5 8/15 1/3 11/30 2/5 17/30 13/30 2/5 3/5 3/5 11/30 1/2 11/30 2/3 17/30 3/5 7/15 19/30 1/2 3/5 1/3 19/30 + 3/5 2/3 13/30 19/30 1/3 2/5 17/30 7/15 11/30 3/5 19/30 7/15 2/5 8/15 17/30 11/30 19/30 13/30 2/3 17/30 8/15 13/30 13/30 3/5 1/2 8/15 8/15 + 3/5 2/3 1/2 1/2 2/3 7/15 3/5 3/5 1/2 1/3 2/5 8/15 2/5 11/30 1/3 8/15 7/15 13/30 0 2/5 11/30 19/30 19/30 2/5 1/2 7/15 7/15 +] +prob3 = IntervalProbabilities(; lower = lower3, upper = upper3) + +prob = OrthogonalIntervalProbabilities((prob1, prob2, prob3), (Int32(3), Int32(3), Int32(3))) ``` """ struct OrthogonalIntervalProbabilities{N, P <: IntervalProbabilities} <: @@ -41,7 +90,7 @@ end """ lower(p::OrthogonalIntervalProbabilities, l) -Return the lower bound transition probabilities from a source state or source/action pair to a target state. +Return the lower bound transition probabilities from a source state or source/action pair to a target axis. """ lower(p::OrthogonalIntervalProbabilities, l) = lower(p.probs[l]) @@ -77,7 +126,7 @@ upper(p::OrthogonalIntervalProbabilities, l, i, j) = upper(p.probs[l], i, j) """ gap(p::OrthogonalIntervalProbabilities, l) -Return the gap between upper and lower bound transition probabilities from a source state or source/action pair to a target state. +Return the gap between upper and lower bound transition probabilities from a source state or source/action pair to a target axis. """ gap(p::OrthogonalIntervalProbabilities, l) = gap(p.probs[l]) @@ -91,7 +140,7 @@ gap(p::OrthogonalIntervalProbabilities, l, i, j) = gap(p.probs[l], i, j) """ sum_lower(p::OrthogonalIntervalProbabilities, l) -Return the sum of lower bound transition probabilities from a source state or source/action pair to all target states. +Return the sum of lower bound transition probabilities from a source state or source/action pair to all target states on one axis. This is useful in efficiently implementing O-maximization, where we start with a lower bound probability assignment and iteratively, according to the ordering, adding the gap until the sum of probabilities is 1. """ @@ -121,8 +170,14 @@ Return the valid range of indices for the source states or source/action pairs. """ axes_source(p::OrthogonalIntervalProbabilities) = axes_source(first(p.probs)) +""" + num_target(p::OrthogonalIntervalProbabilities) + +Return the number of target states along each marginal. +""" num_target(p::OrthogonalIntervalProbabilities{N}) where {N} = ntuple(i -> num_target(p[i]), N) + stateptr(p::OrthogonalIntervalProbabilities) = UnitRange{Int32}(1, num_source(p) + 1) Base.ndims(p::OrthogonalIntervalProbabilities{N}) where {N} = N From e5f4ce0b10dd8695a5112ca9592ba53458c79a40 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 4 Dec 2024 17:15:55 +0100 Subject: [PATCH 2/3] Write documentation for models --- docs/src/reference/systems.md | 8 +- docs/src/theory.md | 8 +- src/models/IntervalMarkovDecisionProcess.jl | 17 ++-- .../MixtureIntervalMarkovDecisionProcess.jl | 88 ++++++++++++++----- ...OrthogonalIntervalMarkovDecisionProcess.jl | 45 +++++----- .../OrthogonalIntervalProbabilities.jl | 6 +- 6 files changed, 110 insertions(+), 62 deletions(-) diff --git a/docs/src/reference/systems.md b/docs/src/reference/systems.md index 27e7034..620cfa5 100644 --- a/docs/src/reference/systems.md +++ b/docs/src/reference/systems.md @@ -6,14 +6,14 @@ num_states(s::IntervalMarkovProcess) initial_states(s::IntervalMarkovProcess) AllStates transition_prob(mp::IntervalMarkovProcess) -IntervalMarkovChain IntervalMarkovDecisionProcess +IntervalMarkovChain stateptr(mdp::IntervalMarkovDecisionProcess) -OrthogonalIntervalMarkovChain OrthogonalIntervalMarkovDecisionProcess +OrthogonalIntervalMarkovChain stateptr(mdp::OrthogonalIntervalMarkovDecisionProcess) -MixtureIntervalMarkovChain MixtureIntervalMarkovDecisionProcess +MixtureIntervalMarkovChain stateptr(mdp::MixtureIntervalMarkovDecisionProcess) ``` @@ -47,7 +47,7 @@ gap(p::OrthogonalIntervalProbabilities, l, i, j) sum_lower(p::OrthogonalIntervalProbabilities, l) sum_lower(p::OrthogonalIntervalProbabilities, l, j) num_source(p::OrthogonalIntervalProbabilities) -num_target(p::OrthogonalIntervalProbabilities{1}) +num_target(p::OrthogonalIntervalProbabilities) axes_source(p::OrthogonalIntervalProbabilities) ``` diff --git a/docs/src/theory.md b/docs/src/theory.md index 9a37641..8ab1484 100644 --- a/docs/src/theory.md +++ b/docs/src/theory.md @@ -13,7 +13,7 @@ where ``s = (s_1, \ldots, s_n)\in S``. We will denote the product ambiguity set ## IMDPs Interval Markov Decision Processes (IMDPs), also called bounded-parameter MDPs [1], are a generalization of MDPs, where the transition probabilities, given source state and action, are not known exactly, but they are constrained to be in some probability interval. -Formally, an IMDP ``M`` is a tuple ``M = (S, S_0`, A, \overline{P}, \underline{P})``, where +Formally, an IMDP ``M`` is a tuple ``M = (S, S_0`, A, \Gamma)``, where - ``S`` is a finite set of states, - ``S_0 \subseteq S`` is a set of initial states, @@ -38,14 +38,14 @@ Paths, strategies, and adversaries are defined similarly to IMDPs. See [3] for m ## Mixtures of OD-IMDPs Mixtures of OD-IMDPs are included to address the issue the OD-IMDPs may not be able to represent all uncertainty in the transition probabilities. The mixture model is a convex combination of OD-IMDPs, where each OD-IMDP has its own set of ambiguity sets. Furthermore, the weights of the mixture are also interval-valued. -Formally, a mixture of OD-IMDPs ``M`` with ``K`` OD-IMDPs and ``n`` marginals is a tuple ``M = (S, S_0, A, \Gamma, \Gamma_\alpha)``, where +Formally, a mixture of OD-IMDPs ``M`` with ``K`` OD-IMDPs and ``n`` marginals is a tuple ``M = (S, S_0, A, \Gamma, \Gamma^\alpha)``, where - ``S = S_1 \times \cdots \times S_n`` is a finite set of joint states with ``S_i`` being a finite set of states for the ``i``-th marginal, - ``S_0 \subseteq S`` is a set of initial states, - ``A`` is a finite set of actions, - ``\Gamma = \{\Gamma_{r,s,a}\}_{r \in K, s\in S,a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)`` and OD-IMDP ``R``, where each ``\Gamma_{r,s,a} = \bigotimes_{i=1}^n \Gamma^i_{r,s,a}`` with ``\Gamma^i_{r,s,a}`` is an interval ambiguity set over the ``i``-th marginal, i.e. over ``S_i``. -- ``\Gamma_\alpha`` is a set of interval ambiguity sets for the weights of the mixture, i.e. over ``\{1, \ldots, K\}``. +- ``\Gamma^\alpha = \{\Gamma^\alpha_{s,a}\}_{s \in S, a \in A}`` is a set of interval ambiguity sets for the weights of the mixture, i.e. over ``\{1, \ldots, K\}``. -A feasible distribution for a mixture of OD-IMDPs is ``\sum_{r \in K} \alpha_r \prod_{i = 1}^n \gamma_{r,s,a}`` where ``\alpha \in \Gamma_\alpha`` and ``\gamma_{r,s,a} \in \Gamma_{r,s,a}``. See [3] for more details on mixtures of OD-IMDPs. +A feasible distribution for a mixture of OD-IMDPs is ``\sum_{r \in K} \alpha_{s,a}(r) \prod_{i = 1}^n \gamma_{r,s,a}`` where ``\alpha_{s,a} \in \Gamma^\alpha_{s,a}`` and ``\gamma_{r,s,a} \in \Gamma_{r,s,a}`` for each source-action pair ``(s, a)``. See [3] for more details on mixtures of OD-IMDPs. ### Reachability In this formal framework, we can describe computing reachability given a target set ``G`` and a horizon ``K \in \mathbb{N} \cup \{\infty\}`` as the following objective diff --git a/src/models/IntervalMarkovDecisionProcess.jl b/src/models/IntervalMarkovDecisionProcess.jl index 69d4a54..30b6865 100644 --- a/src/models/IntervalMarkovDecisionProcess.jl +++ b/src/models/IntervalMarkovDecisionProcess.jl @@ -8,12 +8,17 @@ A type representing (stationary) Interval Markov Decision Processes (IMDP), which are Markov Decision Processes with uncertainty in the form of intervals on the transition probabilities. -Formally, let ``(S, S_0, A, \\bar{P}, \\underbar{P})`` be an interval Markov decision processes, where ``S`` is the set of states, ``S_0 \\subset S`` is the set of initial states, -``A`` is the set of actions, and ``\\bar{P} : A \\to \\mathbb{R}^{|S| \\times |S|}`` and ``\\underbar{P} : A \\to \\mathbb{R}^{|S| \\times |S|}`` are functions -representing the upper and lower bound transition probability matrices prespectively for each action. Then the ```IntervalMarkovDecisionProcess``` type is -defined as follows: indices `1:num_states` are the states in ``S``, `transition_prob` represents ``\\bar{P}`` and ``\\underbar{P}``, actions are -implicitly defined by `stateptr` (e.g. if `stateptr[3] == 4` and `stateptr[4] == 7` then the actions available to state 3 are `[4, 5, 6]`), -and `initial_states` is the set of initial states ``S_0``. If no initial states are specified, then the initial states are assumed to be all states in ``S``. +Formally, let ``(S, S_0, A, \\Gamma)`` be an interval Markov decision process, where +- ``S`` is the set of states, +- ``S_0 \\subseteq S`` is the set of initial states, +- ``A`` is the set of actions, and +- ``\\Gamma = \\{\\Gamma_{s,a}\\}_{s \\in S, a \\in A}`` is a set of interval ambiguity sets on the transition probabilities, for each source-action pair. + +Then the ```IntervalMarkovDecisionProcess``` type is defined as follows: indices `1:num_states` are the states in ``S``, +`transition_prob` represents ``\\Gamma``, actions are implicitly defined by `stateptr` (e.g. if `stateptr[3] == 4` and `stateptr[4] == 7` then +the actions available to state 3 are `[1, 2, 3]`), and `initial_states` is the set of initial states ``S_0``. If no initial states are specified, +then the initial states are assumed to be all states in ``S`` represented by `AllStates`. See [IntervalProbabilities](@ref) and [Theory](@ref) for more information +on the structure of the transition probability ambiguity sets. ### Fields - `transition_prob::P`: interval on transition probabilities where columns represent source/action pairs and rows represent target states. diff --git a/src/models/MixtureIntervalMarkovDecisionProcess.jl b/src/models/MixtureIntervalMarkovDecisionProcess.jl index d7c9805..1d690bf 100644 --- a/src/models/MixtureIntervalMarkovDecisionProcess.jl +++ b/src/models/MixtureIntervalMarkovDecisionProcess.jl @@ -8,37 +8,76 @@ A type representing (stationary) Mixture Interval Markov Decision Processes (OIMDP), which are IMDPs where the transition probabilities for each state can be represented as the product of the transition probabilities of individual processes. -# TODO: Update theory section - -Formally, let ``(S, S_0, A, \\bar{P}, \\underbar{P})`` be an interval Markov decision processes, where ``S`` is the set of states, ``S_0 \\subset S`` is the set of initial states, -``A`` is the set of actions, and ``\\bar{P} : A \\to \\mathbb{R}^{|S| \\times |S|}`` and ``\\underbar{P} : A \\to \\mathbb{R}^{|S| \\times |S|}`` are functions -representing the upper and lower bound transition probability matrices prespectively for each action. Then the ```IntervalMarkovDecisionProcess``` type is -defined as follows: indices `1:num_states` are the states in ``S``, `transition_prob` represents ``\\bar{P}`` and ``\\underbar{P}``, actions are -implicitly defined by `stateptr` (e.g. if `stateptr[3] == 4` and `stateptr[4] == 7` then the actions available to state 3 are `[4, 5, 6]`), -and `initial_states` is the set of initial states ``S_0``. If no initial states are specified, then the initial states are assumed to be all states in ``S``. +Formally, let ``(S, S_0, A, \\Gamma, \\Gamma_\\alpha)`` be an interval Markov decision processes, where +- ``S = S_1 \\times \\cdots \\times S_n`` is the set of joint states with ``S_i`` the set of states for the `i`-th marginal, +- ``S_0 \\subseteq S`` is the set of initial states, +- ``A`` is the set of actions, +- ``\\Gamma = \\{\\Gamma_{s,a}\\}_{s \\in S, a \\in A}`` is a set of interval ambiguity sets on the transition probabilities, + for each source-action pair, with ``\\Gamma_{s,a} = \\bigotimes_{i=1}^n \\Gamma_{s,a}^i`` and ``\\Gamma_{s,a}^i`` is a marginal interval ambiguity sets + on the ``i``-th marginal, and +- ``\\Gamma^\\alpha = \\{\\Gamma^\\alpha_{s,a}\\}_{s \\in S, a \\in A}`` is the interval ambiguity set for the mixture. + +Then the ```MixtureIntervalMarkovDecisionProcess``` type is defined as follows: indices `1:num_states` are the states in ``S`` and +`transition_prob` represents ``\\Gamma`` and ``\\Gamma^\\alpha``. Actions are implicitly defined by `stateptr` (e.g. if `source_dims` in `transition_prob` +is `(2, 3, 2)`, and `stateptr[3] == 4` and `stateptr[4] == 7` then the actions available to state `CartesianIndex(1, 2, 1)` are `[1, 2, 3]`), and `initial_states` +is the set of initial states ``S_0``. If no initial states are specified, then the initial states are assumed to be all states in ``S`` +represented by `AllStates`. See [MixtureIntervalProbabilities](@ref) and [Theory](@ref) for more information on the structure +of the transition probability ambiguity sets. ### Fields -# TODO: Update fields - -- `transition_prob::P`: interval on transition probabilities where columns represent source/action pairs and rows represent target states. -- `stateptr::VT`: pointer to the start of each source state in `transition_prob` (i.e. `transition_prob[:, stateptr[j]:stateptr[j + 1] - 1]` is the transition - probability matrix for source state `j`) in the style of colptr for sparse matrices in CSC format. +- `transition_prob::P`: ambiguity set on transition probabilities (see [MixtureIntervalProbabilities](@ref) for the structure). +- `stateptr::VT`: pointer to the start of each source state in `transition_prob` (i.e. `transition_prob[k][l][:, stateptr[j]:stateptr[j + 1] - 1]` is the transition + probability matrix for source state `j` for each model `k` and axis `l`) in the style of colptr for sparse matrices in CSC format. - `initial_states::VI`: initial states. - `num_states::Int32`: number of states. ### Examples - -# TODO: Update examples - -```jldoctest -``` - -There is also a constructor for `MixtureIntervalMarkovDecisionProcess` where the transition probabilities are given as a list of -transition probabilities for each source state. +The following example is a simple mixture of two `OrthogonalIntervalProbabilities` with one dimension and the same source/action pairs. +The first state has two actions and the second state has one action. The weighting ambiguity set is also specified for the same three source-action pairs. ```jldoctest +prob1 = OrthogonalIntervalProbabilities( + ( + IntervalProbabilities(; + lower = [ + 0.0 0.5 0.1 + 0.1 0.3 0.2 + ], + upper = [ + 0.5 0.7 0.6 + 0.7 0.4 0.8 + ], + ), + ), + (Int32(2),), +) +prob2 = OrthogonalIntervalProbabilities( + ( + IntervalProbabilities(; + lower = [ + 0.1 0.4 0.2 + 0.3 0.0 0.1 + ], + upper = [ + 0.4 0.6 0.5 + 0.7 0.5 0.7 + ], + ), + ), + (Int32(2),), +) +weighting_probs = IntervalProbabilities(; lower = [ + 0.3 0.5 0.4 + 0.4 0.3 0.2 +], upper = [ + 0.8 0.7 0.7 + 0.7 0.5 0.4 +]) +mixture_prob = MixtureIntervalProbabilities((prob1, prob2), weighting_probs) + +stateptr = Int32[1, 3, 4] +mdp = MixtureIntervalMarkovDecisionProcess(mixture_prob, stateptr) ``` - """ struct MixtureIntervalMarkovDecisionProcess{ P <: MixtureIntervalProbabilities, @@ -82,7 +121,7 @@ end Construct a Mixture Interval Markov Chain from mixture interval transition probabilities. The initial states are optional and if not specified, all states are assumed to be initial states. The number of states is inferred from the size of the transition probability matrix. -The returned type is an `OrthogonalIntervalMarkovDecisionProcess` with only one action per state (i.e. `stateptr[j + 1] - stateptr[j] == 1` for all `j`). +The returned type is an `MixtureIntervalMarkovDecisionProcess` with only one action per state (i.e. `stateptr[j + 1] - stateptr[j] == 1` for all `j`). This is done to unify the interface for value iteration. """ function MixtureIntervalMarkovChain( @@ -117,7 +156,8 @@ end Return the state pointer of the Interval Markov Decision Process. The state pointer is a vector of integers where the `i`-th element is the index of the first element of the `i`-th state in the transition probability matrix. -I.e. `transition_prob[:, stateptr[j]:stateptr[j + 1] - 1]` is the transition probability matrix for source state `j`. +I.e. `mixture_probs(transition_prob)[k][l][:, stateptr[j]:stateptr[j + 1] - 1]` is the independent transition probability matrix for (flattened) source state `j` +for axis `l` and model `k`, and `mixture_probs(transition_prob)[:, stateptr[j]:stateptr[j + 1] - 1]` is the weighting matrix for `j`. """ stateptr(mdp::MixtureIntervalMarkovDecisionProcess) = mdp.stateptr diff --git a/src/models/OrthogonalIntervalMarkovDecisionProcess.jl b/src/models/OrthogonalIntervalMarkovDecisionProcess.jl index 4c1356e..caf9231 100644 --- a/src/models/OrthogonalIntervalMarkovDecisionProcess.jl +++ b/src/models/OrthogonalIntervalMarkovDecisionProcess.jl @@ -8,35 +8,40 @@ A type representing (stationary) Orthogonal Interval Markov Decision Processes (OIMDP), which are IMDPs where the transition probabilities for each state can be represented as the product of the transition probabilities of individual processes. -# TODO: Update theory section - -Formally, let ``(S, S_0, A, \\bar{P}, \\underbar{P})`` be an interval Markov decision processes, where ``S`` is the set of states, ``S_0 \\subset S`` is the set of initial states, -``A`` is the set of actions, and ``\\bar{P} : A \\to \\mathbb{R}^{|S| \\times |S|}`` and ``\\underbar{P} : A \\to \\mathbb{R}^{|S| \\times |S|}`` are functions -representing the upper and lower bound transition probability matrices prespectively for each action. Then the ```IntervalMarkovDecisionProcess``` type is -defined as follows: indices `1:num_states` are the states in ``S``, `transition_prob` represents ``\\bar{P}`` and ``\\underbar{P}``, actions are -implicitly defined by `stateptr` (e.g. if `stateptr[3] == 4` and `stateptr[4] == 7` then the actions available to state 3 are `[4, 5, 6]`), -and `initial_states` is the set of initial states ``S_0``. If no initial states are specified, then the initial states are assumed to be all states in ``S``. +Formally, let ``(S, S_0, A, \\Gamma)`` be an orthogonal interval Markov decision process [1], where +- ``S = S_1 \\times \\cdots \\times S_n`` is the set of joint states with ``S_i`` the set of states for the `i`-th marginal, +- ``S_0 \\subseteq S`` is the set of initial states, +- ``A`` is the set of actions, and +- ``\\Gamma = \\{\\Gamma_{s,a}\\}_{s \\in S, a \\in A}`` is a set of interval ambiguity sets on the transition probabilities, + for each source-action pair, with ``\\Gamma_{s,a} = \\bigotimes_{i=1}^n \\Gamma_{s,a}^i`` and ``\\Gamma_{s,a}^i`` is a marginal interval ambiguity sets + on the ``i``-th marginal. + +Then the ```OrthogonalIntervalMarkovDecisionProcess``` type is defined as follows: indices `1:num_states` are the states in ``S`` and +`transition_prob` represents ``\\Gamma``. Actions are implicitly defined by `stateptr` (e.g. if `source_dims` in `transition_prob` is `(2, 3, 2)`, and +`stateptr[3] == 4` and `stateptr[4] == 7` then the actions available to state `CartesianIndex(1, 2, 1)` are `[1, 2, 3]`), and `initial_states` +is the set of initial states ``S_0``. If no initial states are specified, then the initial states are assumed to be all states in ``S`` +represented by `AllStates`. See [OrthogonalIntervalProbabilities](@ref) and [Theory](@ref) for more information on the structure +of the transition probability ambiguity sets. ### Fields -# TODO: Update fields - -- `transition_prob::P`: interval on transition probabilities where columns represent source/action pairs and rows represent target states. -- `stateptr::VT`: pointer to the start of each source state in `transition_prob` (i.e. `transition_prob[:, stateptr[j]:stateptr[j + 1] - 1]` is the transition - probability matrix for source state `j`) in the style of colptr for sparse matrices in CSC format. +- `transition_prob::P`: interval on transition probabilities where columns represent source/action pairs and rows represent target states along each marginal. +- `stateptr::VT`: pointer to the start of each source state in `transition_prob` (i.e. `transition_prob[l][:, stateptr[j]:stateptr[j + 1] - 1]` is the transition + probability matrix for source state `j` for each axis `l`) in the style of colptr for sparse matrices in CSC format. - `initial_states::VI`: initial states. - `num_states::Int32`: number of states. ### Examples +Assume that `prob1`, `prob2`, and `prob3` are `IntervalProbabilities` for the first, second, and third axis, respectively, defined as the example +in [OrthogonalIntervalProbabilities](@ref). Then the following code constructs an `OrthogonalIntervalMarkovDecisionProcess` with three axes of three states each. +The number of actions per state is one, i.e. the model is a Markov chain. Therefore, the `stateptr` is a unit range `1:num_states + 1` and we can call +the convenience constructor `OrthogonalIntervalMarkovChain` instead. -# TODO: Update example ```jldoctest +prob = OrthogonalIntervalProbabilities((prob1, prob2, prob3), (Int32(3), Int32(3), Int32(3))) +mc = OrthogonalIntervalMarkovChain(prob) ``` -There is also a constructor for `OrthogonalIntervalMarkovDecisionProcess` where the transition probabilities are given as a list of -transition probabilities for each source state. - -```jldoctest -``` +[1] Mathiesen, F. B., Haesaert, S., & Laurenti, L. (2024). Scalable control synthesis for stochastic systems via structural IMDP abstractions. arXiv preprint arXiv:2411.11803. """ struct OrthogonalIntervalMarkovDecisionProcess{ @@ -127,7 +132,7 @@ end Return the state pointer of the Interval Markov Decision Process. The state pointer is a vector of integers where the `i`-th element is the index of the first element of the `i`-th state in the transition probability matrix. -I.e. `transition_prob[:, stateptr[j]:stateptr[j + 1] - 1]` is the transition probability matrix for source state `j`. +I.e. `transition_prob[l][:, stateptr[j]:stateptr[j + 1] - 1]` is the transition probability matrix for (flattened) source state `j` for axis `l`. """ stateptr(mdp::OrthogonalIntervalMarkovDecisionProcess) = mdp.stateptr diff --git a/src/probabilities/OrthogonalIntervalProbabilities.jl b/src/probabilities/OrthogonalIntervalProbabilities.jl index 0b864a5..4a85f1a 100644 --- a/src/probabilities/OrthogonalIntervalProbabilities.jl +++ b/src/probabilities/OrthogonalIntervalProbabilities.jl @@ -6,11 +6,9 @@ with target states/marginals on the rows and source states or source/action pair a column-major order, i.e., the first axis of source states is the fastest, similar to the ordering of a multi-dimensional array in Julia. E.g. for an `OrthogonalIntervalProbabilities` with `source_dims == (3, 3, 3)` and 2 actions for each source state ``\\{a_1, a_2\\}``, the columns in order represent the collowing: - ```math ((1, 1, 1), a_1), ((1, 1, 1), a_2), (2, 1, 1), a_1), ((2, 1, 1), a_2), ..., ((3, 3, 3), a_1), ((3, 3, 3), a_2). ``` - The number of target states correspond to the number of rows in the transition probabilities of each axis. @@ -175,8 +173,8 @@ axes_source(p::OrthogonalIntervalProbabilities) = axes_source(first(p.probs)) Return the number of target states along each marginal. """ -num_target(p::OrthogonalIntervalProbabilities{N}) where {N} = - ntuple(i -> num_target(p[i]), N) +num_target(p::OrthogonalIntervalProbabilities) = + ntuple(i -> num_target(p[i]), ndims(p)) stateptr(p::OrthogonalIntervalProbabilities) = UnitRange{Int32}(1, num_source(p) + 1) Base.ndims(p::OrthogonalIntervalProbabilities{N}) where {N} = N From c09fd227601b3127a5fddd0c324a0e8850766e16 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 4 Dec 2024 17:16:57 +0100 Subject: [PATCH 3/3] Format --- src/probabilities/OrthogonalIntervalProbabilities.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/probabilities/OrthogonalIntervalProbabilities.jl b/src/probabilities/OrthogonalIntervalProbabilities.jl index 4a85f1a..59c405f 100644 --- a/src/probabilities/OrthogonalIntervalProbabilities.jl +++ b/src/probabilities/OrthogonalIntervalProbabilities.jl @@ -173,9 +173,8 @@ axes_source(p::OrthogonalIntervalProbabilities) = axes_source(first(p.probs)) Return the number of target states along each marginal. """ -num_target(p::OrthogonalIntervalProbabilities) = - ntuple(i -> num_target(p[i]), ndims(p)) - +num_target(p::OrthogonalIntervalProbabilities) = ntuple(i -> num_target(p[i]), ndims(p)) + stateptr(p::OrthogonalIntervalProbabilities) = UnitRange{Int32}(1, num_source(p) + 1) Base.ndims(p::OrthogonalIntervalProbabilities{N}) where {N} = N