diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml index 277601a..4cf3dc9 100644 --- a/.JuliaFormatter.toml +++ b/.JuliaFormatter.toml @@ -4,7 +4,7 @@ whitespace_typedefs = true whitespace_ops_in_indices = true whitespace_in_kwargs = true remove_extra_newlines = true -always_use_return = true +always_use_return = false indent_submodule = true separate_kwargs_with_semicolon = true surround_whereop_typeparameters = true diff --git a/docs/make.jl b/docs/make.jl index 83e5bb0..81941f4 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,11 +1,11 @@ -using IntervalMDP +using IntervalMDP, IntervalMDP.Data using Documenter push!(LOAD_PATH, "../src/") DocMeta.setdocmeta!(IntervalMDP, :DocTestSetup, :(using IntervalMDP); recursive = true) -makedocs(; - modules = [IntervalMDP], +makedocs(; + modules = [IntervalMDP, IntervalMDP.Data], authors = "Frederik Baymler Mathiesen and contributors", sitename = "IntervalMDP.jl", format = Documenter.HTML(; @@ -18,6 +18,7 @@ makedocs(; "Home" => "index.md", "Usage" => "usage.md", "Data formats" => "data.md", + "Theory" => "theory.md", "Algorithms" => "algorithms.md", "Reference" => Any[ "Systems" => "reference/systems.md", diff --git a/docs/src/algorithms.md b/docs/src/algorithms.md index bc03301..92bb879 100644 --- a/docs/src/algorithms.md +++ b/docs/src/algorithms.md @@ -1 +1,151 @@ -# Algorithms \ No newline at end of file +# Algorithms + +To simplify the dicussion on the algorithmic choices, we will assume that the goal is to compute the maximizing pessimistic probability of reaching a set of states ``G``, that is, + +```math +\max_{\pi} \; \min_{\eta} \; \mathbb{P}_{\pi,\eta }\left[\omega \in \Omega : \exists k \in [0,K], \, \omega(k)\in G \right]. +``` + +See [Theory](@ref) for more details on the theory behind IMDPs including strategies and adversaries; in this case the maximization and minimization operators respectively. The algorithms are easily adapted to other specifications, such as minimizing optimistic probability, which is useful for safety, or maximizing pessimitic discounted reward. Assume furthermore that the transition probabilities are represented as a sparse matrix. +This is the most common representation for large models, and the algorithms are easily adapted to dense matrices with the sorting (see [Sorting](@ref)) being shared across states such that parallelizing this has a smaller impact on performance. + +## Solving reachability as value iteration +Computing the solution to the above problem can be reframed in terms of value iteration. The value function ``V_k`` is the probability of reaching ``G`` in ``k`` steps or fewer. The value function is initialized to ``V_0(s) = 1`` if ``s \in G`` and ``V_0(s) = 0`` otherwise. The value function is then iteratively updated according to the Bellman equation +```math +\begin{aligned} + V_{0}(s) &= \mathbf{1}_{G}(s) \\ + V_{k}(s) &= \mathbf{1}_{G}(s) + \mathbf{1}_{S\setminus G}(s) \max_{a \in A} \min_{p_{s,a}\in \Gamma_{s,a}} \sum_{s' \in S} V_{k-1}(s') p_{s,a}(s'), +\end{aligned} +``` +where ``\mathbf{1}_{G}(s) = 1`` if ``s \in G`` and ``0`` otherwise is the indicator function for set ``G``. This Bellman update is repeated until ``k = K``, or if ``K = \infty``, the value function converges, i.e. ``V_k = V_{k-1}`` for some ``k``. The value function is then the solution to the problem. +Exact convergence is virtually impossible to achieve in a finite number of iterations due to the finite precision of floating point numbers. Hence, we instead use a residual tolerance ``\epsilon`` and stop when Bellman residual ``V_k - V_{k-1}`` is less than the threshold, ``\|V_k - V_{k-1}\|_\infty < \epsilon``. + +In a more programmatic formulation, the algorithm (for ``K = \infty``) can be summarized as follows: + +```julia +function value_iteration(system, spec) + V = initialize_value_function(spec) + + while !converged(V) + V = bellman_update(V, system) + end +end +``` + +## Efficient value iteration + +Computing the Bellman update for can be done indepently for each state. +```julia +function bellman_update(V, system) + # Thread.@threads parallelize across available threads + Thread.@threads for s in states(system) + # Minimize over probability distributions in `Gamma_{s,a}`, i.e. pessimistic + V_state = minimize_feasible_dist(V, system, s) + + # Maximize over actions + V[s] = maximum(V_state) + end +end +``` + +For each state, we need to compute the minimum over all feasible distributions per state-action pairs and the maximum over all actions for each state. +The minimum over all feasible distributions can be computed as a solution to a Linear Programming (LP) problem, namely + +```math + \begin{aligned} + \min_{p_{s,a}} \quad & \sum_{s' \in S} V_{k-1}(s') \cdot p_{s,a}(s'), \\ + \quad & \underline{P}(s,a,s') \leq p_{s,a}(s') \leq \overline{P}(s,a,s') \quad \forall s' \in S, \\ + \quad & \sum_{s' \in S} p_{s,a}(s') = 1. \\ + \end{aligned} +``` + +However, due to the particular structure of the LP problem, we can use a more efficient algorithm: O-maximization, or ordering-maximization [1]. +In the case of pessimistic probability, we want to assign the most possible probability mass to the destinations with the smallest value of ``V_{k-1}``, while obeying that the probability distribution is feasible, i.e. within the probability bounds and that it sums to 1. This is done by sorting the values of ``V_{k-1}`` and then assigning state with the smallest value its upper bound, then the second smallest, and so on until the remaining mass must be assigned to the lower bound of the remaining states for probability distribution is feasible. +```julia +function minimize_feasible_dist(V, system, s) + # Sort values of `V` in ascending order + order = sortperm(V) + + # Initialize distribution to lower bounds + p = lower_bounds(system, s) + rem = sum(p) + + # Assign upper bounds to states with smallest values + # until remaining mass is zero + for idx in order + gap = upper_bounds(system, s)[idx] - p[idx] + if rem <= gap + p[idx] += rem + break + else + p[idx] += gap + rem -= gap + end + end + + return p +end +``` + +We abstract this algorithm into the sorting phase and the O-maximization phase: +```julia +function minimize_feasible_dist(V, system, s) + # Sort values of `V` in ascending order + order = sortstates(V) + p = o_maximize(system, s, order) + return p +end +``` + +When computing computing the above on a GPU, we can and should parallelize both the sorting and the O-maximization phase. +In the following two sections, we will discuss how parallelize these phases. + +### Sorting +Sorting in parallel on the GPU is a well-studied problem, and there are many algorithms for doing so. We choose to use bitonic sorting, which is a sorting network that is easily parallelized and implementable on a GPU. The idea is to merge bitonic subsets, i.e. sets with first increasing then decreasing subsets of equal size, of increasingly larger sizes and perform minor rounds of swaps to maintain the bitonic property. The figure below shows 3 major rounds to sort a set of 8 elements (each line represents an element, each arrow is a comparison pointing towards the larger element). The latency[^1] of the sorting network is ``O((\lg n)^2)``, and thus it scales well to larger number of elements. See [Wikipedia](https://en.wikipedia.org/wiki/Bitonic_sorter) for more details. + +![](assets/bitonic_sorting.svg) + + +### O-maximization +In order to parallelize the O-maximization phase, observe that O-maximization implicity implements a cumulative sum according to the ordering over gaps and this is the only dependency between the states. Hence, if we can parallelize this cumulative sum, then we can parallelize the O-maximization phase. +Luckily, there is a well-studied algorithm for computing the cumulative sum in parallel: tree reduction for prefix scan. The idea is best explained with figure below. + +![](assets/tree_reduction_prefix_scan.svg) + +Here, we recursively compute the cumulative sum of larger and larger subsets of the array. The latency is ``O(lg n)``, and thus very efficient. See [Wikipedia](https://en.wikipedia.org/wiki/Prefix_sum) for more details. When implementing the tree reduction on GPU, it is possible to use warp shuffles to very efficiently perform tree reductions of up to 32 elements. For larger sets, shared memory to store the intermediate results, which is much faster than global memory. See [CUDA Programming Model](@ref) for more details on why these choices are important. + +Putting it all together, we get the following (pseudo-code) algorithm for O-maximization: +```julia +function o_maximize(system, s, order) + p = lower_bounds(system, s) + rem = 1 - sum(p) + gap = upper_bounds(system, s) - p + + # Ordered cumulative sum of gaps + cumgap = cumulative_sum(gap[order]) + + @parallelize for (i, o) in enumerate(order) + rem_state = max(rem - cumgap[i] + gap[o], 0) + if gap[o] < rem_state + p[o] += gap[o] + else + p[o] += rem_state + break + end + end + + return p +end +``` + +## CUDA Programming Model +We here give a brief introduction to the CUDA programming model to understand to algorithmic choices. For a more in-depth introduction, see the [CUDA C++ Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html). The CUDA framework is Single-Instruction Multiple-Thread (SIMT) parallel execution platform and Application Programming Interface. This is in contrast to Single-Instruction Multiple-Data where all data must be processed homogeneously without control flow. SIMT makes CUDA more flexible for heterogeneous processing and control flow. The smallest execution unit in CUDA is a thread, which is a sequential processing of instructions. A thread is uniquely identified by its thread index, which allows indexing into the global data for parallel processing. A group of 32 threads[^2] is called a warp, which will be executed _mostly_ synchronously on a streaming multiprocessor. If control flow makes threads in a wrap diverge, instructions may need to be decoded twice and executed in two separate cycles. Due to this synchronous behavior, data can be shared in registers between threads in a warp for maximum performance. A collection of (up to) 1024 threads is called a block, and this is the largest aggregation that can be synchronized. Furthermore, threads in a block share the appropriately named shared memory. This is memory that is stored locally on the streaming multiprocessor for fast access. Note that shared memory is unintuitively faster than local memory (not to be confused with registers) due to local memory being allocated in device memory. Finally, a collection of (up to) 65536 blocks is called the grid of a kernel, which is the set of instructions to be executed. The grid is singular as only a single ever exists per launched kernel. Hence, if more blocks are necessary to process the amount of data, then a grid-strided loop or multiple kernels are necessary. + +![](assets/cuda_programming_model.svg) + + +[1] M. Lahijanian, S. B. Andersson and C. Belta, "Formal Verification and Synthesis for Discrete-Time Stochastic Systems," in IEEE Transactions on Automatic Control, vol. 60, no. 8, pp. 2031-2045, Aug. 2015, doi: 10.1109/TAC.2015.2398883. + +[^1]: Note that when assessing parallel algorithms, the asymptotic performance is measured by the latency, which is the delay in the number of parallel operations, before the result is available. This is in contrast to traditional algorithms, which are assessed by the total number of operations. + +[^2]: with consecutive thread indices aligned to a multiple of 32. \ No newline at end of file diff --git a/docs/src/assets/bitonic_sorting.svg b/docs/src/assets/bitonic_sorting.svg new file mode 100644 index 0000000..f8af3f4 --- /dev/null +++ b/docs/src/assets/bitonic_sorting.svg @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/src/assets/cuda_programming_model.svg b/docs/src/assets/cuda_programming_model.svg new file mode 100644 index 0000000..6b1ca15 --- /dev/null +++ b/docs/src/assets/cuda_programming_model.svg @@ -0,0 +1,354 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/src/assets/tree_reduction_prefix_scan.svg b/docs/src/assets/tree_reduction_prefix_scan.svg new file mode 100644 index 0000000..d314495 --- /dev/null +++ b/docs/src/assets/tree_reduction_prefix_scan.svg @@ -0,0 +1,391 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/src/data.md b/docs/src/data.md index e2a327c..761e401 100644 --- a/docs/src/data.md +++ b/docs/src/data.md @@ -1,10 +1,147 @@ # Data storage formats - -- ASCII seems to dominate (but is very inefficient) -- Binary formats are more efficient but not standardized +IntervalMDP.jl supports reading and writing data in various formats, namely PRISM explicit format, bmdp-tool, and our own format (model and specification). +To justify introducing another standard ([see relevant XKCD](https://xkcd.com/927/)), note that the PRISM explicit format and the bmdp-tool format are all written in ASCII, which is very inefficient in terms of storage space (especially for storing floating point numbers) and parsing time. We propose a binary format for the most storage-intensive part of the data, namely the transition probabilities, and use JSON for the specification, which is human- and machine-readable and widely used. ## PRISM +IntervalMDP.jl supports reading and writing [PRISM](https://www.prismmodelchecker.org/) [1] explicit data format. +The data format is split into 4 different files, one for the states, one for the labels, one for the transition probabilities, and one for the specification. Therefore, our interface for reading PRISM files takes the path without file ending and adds the appropriate ending to each of the four files. + +```julia +# Read +problem = read_prism_file(path_without_file_ending) + +# Write +write_prism_file(path_without_file_ending, problem) +``` + +The problem structure contains both the \gls{imdp} and the specification including whether to synthesize a maximizing or minimizing strategy and whether to use an optimistic or pessimistic adversary. + +PRISM uses 4 different ASCII-encoded files to store the explicit representation of the system: '.sta' (states), '.lab' (labels), '.tra' (transitions), and '.pctl' (property). In the tables below, we list the format for each file. +The extended details of the PRISM explicit file format can be found in [the appendix of the PRISM manual](https://www.prismmodelchecker.org/manual/Appendices/ExplicitModelFiles). + +#### States .sta +| Number of lines | Description | +|:----------------|:--------------------------------------------------------------------------------------------------------------| +| First line | The first line containing symbolically `(v1, v2, ..., vn)` is a list of ``n`` variables in the model. | +| ``m`` lines where ``m`` is the number of states | Each line contains `i:(v1, v2, ..., vn)` where `i` is the index of the state and `(v1, v2, ..., vn)`` is an assignment of values to the variables in the model. Indices are zero-indexed. | + +#### Labels .lab +| Number of lines | Description | +|:----------------|:--------------------------------------------------------------------------------------------------------------| +| First line | Contains a space-separated list of labels with index `i="label"`. The first two must be `0="init" 1="deadlock"`. | +| All remaining lines | Contains `i: j1 j2 j3 ...` where `i` is a state index and `j1 j2 j3 ...` are space-separated indices of labels associated with state `i`. | + +#### Transitions .tra +| Number of lines | Description | +|:----------------|:--------------------------------------------------------------------------------------------------------------| +| First line | `num_states num_choices num_transitions` where `num_state` must match the number in the state file. | +| Following `num_transitions` lines | A list of transition probabilities with the format `src_idx act_idx dest_idx [p_lower,p_upper] action`. | + +#### Property .pctl +| Number of lines | Description | +|:----------------|:--------------------------------------------------------------------------------------------------------------| +| First line | PRISM property specification | ## bmdp-tool +[bmdp-tool](https://github.com/aria-systems-group/bmdp-tool) data format is similar to the PRISM explicit format transition probability files, where transition probabilities are stored line-by-line with source, action, destination, and probability bounds in ASCII. Key differences include no explicit listing of states, the fact that it only supports reachability properties, and that terminal states are listed directly in the transition probability file. As a result, bmdp-tool data format is a single file. This format lacks information about whether the reachability is finite or infinite time, and hence the reader only returns the set of terminal states. + +```julia +# Read +imdp, terminal_states = read_bmdp_tool_file(path) + +# Write +write_bmdp_tool_file(path, problem) +``` + +bmdp-tool uses only one ASCII file with the following format: + +| Number of lines | Description | +|:----------------|:--------------------------------------------------------------------------------------------------------------| +| First line | `num_states`. | +| Second line | `num_actions` (not to be confused with `num_choices` of PRISM). | +| Third line | `num_terminal`. | +| The following `num_terminal` lines | Indices (zero-indexed) of terminal states, one per line. | +| The following `num_terminal` lines | Indices (zero-indexed) of terminal states, one per line. | +| All remaining lines | A list of transition probabilities with the format `src_idx act_idx dest_idx p_lower p_upper`. | + +!!! terminology "Choices vs actions" + + In PRISM, the number of choices which is listed in the transition file is the sum of the number of feasible actions in each state. In bmdp-tool, the number of actions is the total number of different actions in the model, i.e. in each state up to `num_actions` may be feasible. This is a subtle difference, but it is important to be aware of as the parsing in either tool requires the right number to be specified. + +## IntervalMDP.jl +IntervalMDP.jl also supports a different _binary_ format based on NetCDF to store transition probabilities. We use JSON to store the specification, as storage space for the specification is much less a concern, and because JSON is a widely used, human-readable, file format. + +```julia +# Read +imdp = read_imdp_jl_model(model_path) +spec = read_imdp_jl_spec(spec_path) +problem = Problem(imdp, spec) + +problem = read_imdp_jl(model_path, spec_path) + +# Write +write_imdp_jl_model(model_path, imdp_or_problem) +write_imdp_jl_spec(spec_path, spec_or_problem) +``` + +The new format proposed uses netCDF, which is based on HDF5 underlying, to store transition probabilities, and a JSON file to store the specification. Transition probabilities are stored in CSC-format, which is unfortunately not natively stored in netCDF, nor any widely available format. +Therefore, we store the following attributes and variables in the netCDF file: + +__Global attributes:__ + +- `num_states` +- `model` (either `imc` or `imdp`) +- `format` (assert `sparse_csc`) +- `rows` (assert `to`) +- `cols` (assert `from` if model is `imc` and `from/action` if model is `imdp`) + +__Variables:__ +- `lower_colptr` (integer) +- `lower_rowval` (integer) +- `lower_nzval` (floating point) +- `upper_colptr` (integer) +- `upper_rowval` (integer) +- `upper_nzval` (floating point) +- `initial_states` (integer) +- `stateptr` (integer, only for `imdp`) +- `action_vals` (any netCDF supported type, only for `imdp`) + +We store the specification in a JSON format where the structure depends on the type of specification. +For a reachability-like specification, the specification is the following format + +```json +{ + "property": { + "type": <"reachability"|"reach-avoid">, + "infinite_time": , + "time_horizon": , + "eps": , + "reach": [], + "avoid": [] + }, + "satisfaction_mode": <"pessimistic"|"optimistic">, + "strategy_mode": <"minimize"|"maximize"> +} +``` + +For a finite horizon property, `eps` is excluded, and similarly for an infinite horizon property, `time\_horizon` is excluded. +For a proper reachability property, the `avoid`-field is excluded. + +If we instead want to optimize a reward, the format is the following + +```json +{ + "property": { + "type": "reward", + "infinite_time": , + "time_horizon": , + "eps": , + "reward": [] + "discount" + }, + "satisfaction_mode": <"pessimistic"|"optimistic">, + "strategy_mode": <"minimize"|"maximize"> +} +``` -## IntervalMDP.jl \ No newline at end of file +[1] Kwiatkowska, Marta, Gethin Norman, and David Parker. "PRISM 4.0: Verification of probabilistic real-time systems." Computer Aided Verification: 23rd International Conference, CAV 2011, Snowbird, UT, USA, July 14-20, 2011. Proceedings 23. Springer Berlin Heidelberg, 2011. \ No newline at end of file diff --git a/docs/src/theory.md b/docs/src/theory.md new file mode 100644 index 0000000..8dcf9fa --- /dev/null +++ b/docs/src/theory.md @@ -0,0 +1,34 @@ +# Theory +Interval Markov Decision Processes (IMDPs), also called bounded-parameter MDPs [1], are a generalization of MDPs, where the transition probabilities, given source state and action, are not known exactly, but they are constrained to be in some probability interval. +Formally, an IMDP ``M`` is a tuple ``M = (S, S_0`, A, \overline{P}, \underline{P})``, where + +- ``S`` is a finite set of states, +- ``S_0 \subseteq S`` is a set of initial states, +- ``A`` is a finite set of actions, +- ``\underline{P}: S \times A \times S \to [0,1]`` is a function, where ``\underline{P}(s,a,s')`` defines the lower bound of the transition probability from state ``s\in S`` (source) to state ``s'\in S`` (destination) under action ``a \in A``, +- ``\overline{P}: S \times A \times S \to [0,1]`` is a function, where ``\overline{P}(s,a,s')`` defines the upper bound of the transition probability from state ``s\in S`` to state ``s'\in S`` under action ``a \in A``. + +For each state-action pair ``(s,a) \in S \times A``, it holds that ``\sum_{s'\in S} \underline{P}(s,a,s') \leq 1 \leq \sum_{s'\in S} \overline{P}(s,a,s')`` and a transition probability distribution ``p_{s,a}:S\to[0,1]`` is called _feasible_ if ``\underline{P}(s,a,s') \leq p_{s,a}(s') \leq \overline{P}(s,a,s')`` for all destinations ``s'\in S``. The set of all feasible distributions for the state-action pair ``(s,a)`` is denoted by ``\Gamma_{s,a}``. + +A path of an IMDP is a sequence of states and actions ``\omega = (s_0,a_0),(s_1,a_1),\dots``, where ``(s_i,a_i)\in S \times A``. We denote by ``\omega(k) = s_k`` the state of the path at time ``k \in \mathbb{N}^0`` and by ``\Omega`` the set of all paths. +A _strategy_ or _policy_ for an IMDP is a function ``\pi`` that assigns an action to a given state of an IMDP. _Time-dependent_ strategies are functions from state and time step to an action, i.e. ``\pi: S\times \mathbb{N}^0 \to A``. If ``\pi`` does not depend on time and solely depends on the current state, it is called a _stationary_ strategy. Similar to a strategy, an adversary ``\eta`` is a function that assigns a feasible distribution to a given state. Given a strategy and an adversary, an IMDP collapses to a finite Markov chain. + +### Reachability +In this formal framework, we can describe computing reachability given a target set ``G`` and a horizon ``K \in \mathbb{N} \cup \{\infty\}`` as the following objective + +```math +{\mathop{opt}\limits_{\pi}}^{\pi} \; {\mathop{opt}\limits_{\eta}}^{\eta} \; \mathbb{P}_{\pi,\eta }\left[\omega \in \Omega : \exists k \in [0,K], \, \omega(k)\in G \right], +``` + +where ``\mathop{opt}^{\pi},\mathop{opt}^{\eta} \in \{\min, \max\}`` and ``\mathbb{P}_{\pi,\eta }`` is the probability of the Markov chain induced by strategy ``\pi`` and adversary ``\eta``. +When ``\mathop{opt}^{\eta} = \min``, the solution is called optimal _pessimistic_ probability (or reward), and conversely is called optimal _optimistic_ probability (or reward) when ``\mathop{opt}^{\eta} = \max``. +The choice of the min/max for the action and pessimistic/optimistic probability depends on the application. + +### Discounted reward +Discounted reward is similar to reachability but instead of a target set, we have a reward function ``r: S \to \mathbb{R}`` and a discount factor ``\gamma \in (0, 1)``. The objective is then + +```math +{\mathop{opt}\limits_{\pi}}^{\pi} \; {\mathop{opt}\limits_{\eta}}^{\eta} \; \mathbb{E}_{\pi,\eta }\left[\sum_{k=0}^{K} \gamma^k r(\omega(k)) \right]. +``` + +[1] Givan, Robert, Sonia Leach, and Thomas Dean. "Bounded-parameter Markov decision processes." Artificial Intelligence 122.1-2 (2000): 71-109. \ No newline at end of file diff --git a/docs/src/usage.md b/docs/src/usage.md index 7467473..4ee8cea 100644 --- a/docs/src/usage.md +++ b/docs/src/usage.md @@ -67,15 +67,15 @@ mdp = IntervalMarkovDecisionProcess(transition_probs, initial_states) ``` Note that for an IMDP, the transition probabilities are specified as a list of pairs from actions to transition probabilities for each state. -The constructor with concatenate the transition probabilities into a single matrix, such that the columns represent source/action pairs and the rows represent target states. +The constructor will concatenate the transition probabilities into a single matrix, such that the columns represent source/action pairs and the rows represent target states. It will in addition construct a state pointer `stateptr` pointing to the first column of each state and concatenate a list of actions. See [`IntervalMarkovDecisionProcess`](@ref) for more details on how to construct an IMDP. -For IMC, it is signifianctly simpler with source states on the columns and target states on the rows of the transition matrices. +For IMC, the transition probability structure is significantly simpler with source states on the columns and target states on the rows of the transition matrices. -Next, we choose a specification. Currently, we support reachability, reach-avoid, and reward specifications. +Next, we choose a specification. Currently, we support reachability, reach-avoid, and reward properties. For reachability, we specify a target set of states and for reach-avoid we specify a target set of states and an avoid set of states. -Furthermore, we distinguish between finite and infinite horizon specifications. +Furthermore, we distinguish between finite and infinite horizon properties. In addition to the property, we need to specify whether we want to maximize or minimize the optimistic or pessimistic satistisfaction probability or discounted reward. ```julia ## Properties diff --git a/ext/IntervalMDPCudaExt.jl b/ext/IntervalMDPCudaExt.jl index c294f10..8cc8567 100644 --- a/ext/IntervalMDPCudaExt.jl +++ b/ext/IntervalMDPCudaExt.jl @@ -12,7 +12,10 @@ Adapt.@adapt_structure IntervalProbabilities # Opinionated conversion to GPU with Float64 values and Int32 indices IntervalMDP.cu(model) = adapt(IntervalMDP.CuModelAdaptor{Float64, Int32}, model) -function Adapt.adapt_structure(T::Type{<:IntervalMDP.CuModelAdaptor}, mc::IntervalMarkovChain) +function Adapt.adapt_structure( + T::Type{<:IntervalMDP.CuModelAdaptor}, + mc::IntervalMarkovChain, +) return IntervalMarkovChain( adapt(T, transition_prob(mc)), adapt(CuArray{IntervalMDP.indtype(T)}, initial_states(mc)), diff --git a/ext/cuda/array.jl b/ext/cuda/array.jl index 576c924..a431e5b 100644 --- a/ext/cuda/array.jl +++ b/ext/cuda/array.jl @@ -154,5 +154,7 @@ Adapt.adapt_storage( M::SparseMatrixCSC, ) where {Tv, Ti} = CuSparseMatrixCSC{Tv, Ti}(M) -Adapt.adapt_storage(::Type{IntervalMDP.CuModelAdaptor{Tv, Ti}}, x::AbstractArray) where {Tv, Ti} = - adapt(CuArray{Tv}, x) +Adapt.adapt_storage( + ::Type{IntervalMDP.CuModelAdaptor{Tv, Ti}}, + x::AbstractArray, +) where {Tv, Ti} = adapt(CuArray{Tv}, x) diff --git a/ext/cuda/interval_probabilities.jl b/ext/cuda/interval_probabilities.jl index 358b1e9..2c405fc 100644 --- a/ext/cuda/interval_probabilities.jl +++ b/ext/cuda/interval_probabilities.jl @@ -1,5 +1,8 @@ -function IntervalMDP.compute_gap(lower::M, upper::M) where {Tv, Ti, M <: CuSparseMatrixCSC{Tv, Ti}} +function IntervalMDP.compute_gap( + lower::M, + upper::M, +) where {Tv, Ti, M <: CuSparseMatrixCSC{Tv, Ti}} # lower = CuSparseMatrixCOO(lower) # FIXME: This is an ugly, non-robust hack. diff --git a/src/specification.jl b/src/specification.jl index b308871..603c42b 100644 --- a/src/specification.jl +++ b/src/specification.jl @@ -7,6 +7,14 @@ Super type for all system Property """ abstract type Property end +function checktimehorizon!(prop) + @assert time_horizon(prop) > 0 +end + +function checkconvergence!(prop) + @assert convergence_eps(prop) > 0 +end + ## Temporal logics """ @@ -103,7 +111,7 @@ end function checkproperty!(prop::FiniteTimeReachability, system::IntervalMarkovProcess) checkterminal!(terminal_states(prop), num_states(system)) - @assert time_horizon(prop) > 0 + checktimehorizon!(prop) end """ @@ -151,7 +159,7 @@ end function checkproperty!(prop::InfiniteTimeReachability, system::IntervalMarkovProcess) checkterminal!(terminal_states(prop), num_states(system)) - @assert convergence_eps(prop) > 0 + checkconvergence!(prop) end """ @@ -210,7 +218,7 @@ end function checkproperty!(prop::FiniteTimeReachAvoid, system::IntervalMarkovProcess) checkterminal!(terminal_states(prop), num_states(system)) checkdisjoint!(reach(prop), avoid(prop)) - @assert time_horizon(prop) > 0 + checktimehorizon!(prop) end """ @@ -264,7 +272,7 @@ end function checkproperty!(prop::InfiniteTimeReachAvoid, system::IntervalMarkovProcess) checkterminal!(terminal_states(prop), num_states(system)) checkdisjoint!(reach(prop), avoid(prop)) - @assert convergence_eps(prop) > 0 + checkconvergence!(prop) end """ @@ -325,6 +333,11 @@ Super type for all reward specifications. """ abstract type AbstractReward{R <: Real} <: Property end +function checkreward!(prop::AbstractReward, system::IntervalMarkovProcess) + @assert length(reward(prop)) == num_states(system) + @assert 0 < discount(prop) < 1 +end + """ FiniteTimeReward{R <: Real, T <: Integer, VR <: AbstractVector{R}} @@ -340,8 +353,8 @@ struct FiniteTimeReward{R <: Real, T <: Integer, VR <: AbstractVector{R}} <: end function checkproperty!(prop::FiniteTimeReward, system::IntervalMarkovProcess) - @assert length(reward(prop)) == num_states(system) - @assert time_horizon(prop) > 0 + checkreward!(prop, system) + checktimehorizon!(prop) end """ @@ -386,8 +399,8 @@ struct InfiniteTimeReward{R <: Real, VR <: AbstractVector{R}} <: AbstractReward{ end function checkproperty!(prop::InfiniteTimeReward, system::IntervalMarkovProcess) - @assert length(reward(prop)) == num_states(system) - @assert convergence_eps(prop) > 0 + checkreward!(prop, system) + checkconvergence!(prop) end """