From 96f5f827c1c9363968800de59e399add68664f10 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Fri, 29 Aug 2025 22:56:18 +0200 Subject: [PATCH 01/71] WIP: fIMDP --- docs/src/reference/systems.md | 2 - src/Data/bmdp-tool.jl | 4 +- src/Data/intervalmdp.jl | 2 +- src/Data/prism.jl | 2 +- src/bellman.jl | 4 +- src/cuda.jl | 2 +- .../FactoredRobustMarkovDecisionProcess.jl | 83 ++++++ src/models/IntervalMarkovDecisionProcess.jl | 185 ------------- .../MixtureIntervalMarkovDecisionProcess.jl | 169 ------------ ...OrthogonalIntervalMarkovDecisionProcess.jl | 143 ---------- src/models/models.jl | 13 +- src/probabilities/IntervalAmbiguitySets.jl | 189 ++++++++++++++ src/probabilities/IntervalProbabilities.jl | 244 ------------------ .../MixtureIntervalProbabilities.jl | 167 ------------ .../OrthogonalIntervalProbabilities.jl | 186 ------------- src/probabilities/probabilities.jl | 14 +- src/problem.jl | 4 + src/strategy_cache.jl | 2 +- src/utils.jl | 4 +- src/workspace.jl | 24 +- 20 files changed, 306 insertions(+), 1137 deletions(-) create mode 100644 src/models/FactoredRobustMarkovDecisionProcess.jl delete mode 100644 src/models/IntervalMarkovDecisionProcess.jl delete mode 100644 src/models/MixtureIntervalMarkovDecisionProcess.jl delete mode 100644 src/models/OrthogonalIntervalMarkovDecisionProcess.jl create mode 100644 src/probabilities/IntervalAmbiguitySets.jl delete mode 100644 src/probabilities/IntervalProbabilities.jl delete mode 100644 src/probabilities/MixtureIntervalProbabilities.jl delete mode 100644 src/probabilities/OrthogonalIntervalProbabilities.jl diff --git a/docs/src/reference/systems.md b/docs/src/reference/systems.md index 50b7d167..2f0e223b 100644 --- a/docs/src/reference/systems.md +++ b/docs/src/reference/systems.md @@ -39,8 +39,6 @@ upper(p::IntervalProbabilities) upper(p::IntervalProbabilities, i, j) gap(p::IntervalProbabilities) gap(p::IntervalProbabilities, i, j) -sum_lower(p::IntervalProbabilities) -sum_lower(p::IntervalProbabilities, j) num_source(p::IntervalProbabilities) num_target(p::IntervalProbabilities) axes_source(p::IntervalProbabilities) diff --git a/src/Data/bmdp-tool.jl b/src/Data/bmdp-tool.jl index 38c837ce..3a3e622e 100644 --- a/src/Data/bmdp-tool.jl +++ b/src/Data/bmdp-tool.jl @@ -48,7 +48,7 @@ function read_bmdp_tool_file(path) end probs = Vector{ - IntervalProbabilities{ + IntervalAmbiguitySet{ Float64, Vector{Float64}, SparseArrays.FixedSparseCSC{Float64, Int32}, @@ -97,7 +97,7 @@ function read_bmdp_tool_file(path) probs_lower = probs_lower[:, actions_to_keep] probs_upper = probs_upper[:, actions_to_keep] - probs[j + 1] = IntervalProbabilities(; lower = probs_lower, upper = probs_upper) + probs[j + 1] = IntervalAmbiguitySet(; lower = probs_lower, upper = probs_upper) end action_list_per_state = collect(0:(number_actions - 1)) diff --git a/src/Data/intervalmdp.jl b/src/Data/intervalmdp.jl index 35169ace..8f89c824 100644 --- a/src/Data/intervalmdp.jl +++ b/src/Data/intervalmdp.jl @@ -60,7 +60,7 @@ function read_intervalmdp_jl_model(model_path) upper_nzval, ) - prob = IntervalProbabilities(; lower = P̲, upper = P̅) + prob = IntervalAmbiguitySet(; lower = P̲, upper = P̅) stateptr = convert.(Int32, dataset["stateptr"][:]) return IntervalMarkovDecisionProcess(prob, stateptr, initial_states) diff --git a/src/Data/prism.jl b/src/Data/prism.jl index 8e0f466d..6d9cd417 100644 --- a/src/Data/prism.jl +++ b/src/Data/prism.jl @@ -377,7 +377,7 @@ function read_prism_transitions_file(tra_path, num_states) probs_upper_nzval, ) - probs = IntervalProbabilities(; lower = probs_lower, upper = probs_upper) + probs = IntervalAmbiguitySet(; lower = probs_lower, upper = probs_upper) return probs, stateptr end diff --git a/src/bellman.jl b/src/bellman.jl index 0862a01b..beb616c4 100644 --- a/src/bellman.jl +++ b/src/bellman.jl @@ -247,7 +247,7 @@ function _bellman_helper!( strategy_cache::AbstractStrategyCache, Vres, V, - prob::IntervalProbabilities, + prob::IntervalAmbiguitySet, stateptr; upper_bound = false, maximize = true, @@ -277,7 +277,7 @@ function _bellman_helper!( strategy_cache::AbstractStrategyCache, Vres, V, - prob::IntervalProbabilities, + prob::IntervalAmbiguitySet, stateptr; upper_bound = false, maximize = true, diff --git a/src/cuda.jl b/src/cuda.jl index b77d216d..e848591c 100644 --- a/src/cuda.jl +++ b/src/cuda.jl @@ -31,7 +31,7 @@ function checkdevice(v::AbstractArray, system::IntervalMarkovProcess) checkdevice(v, transition_prob(system)) end -function checkdevice(v::AbstractArray, p::IntervalProbabilities) +function checkdevice(v::AbstractArray, p::IntervalAmbiguitySet) # Lower and gap are required to be the same type. checkdevice(v, lower(p)) end diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl new file mode 100644 index 00000000..3848ee0e --- /dev/null +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -0,0 +1,83 @@ + +struct FactoredRobustMarkovDecisionProcess{ + N, + M, + P <: NTuple{N, <:AbstractMarginal}, + VI <: InitialStates, +} <: IntervalMarkovProcess + state_vars::NTuple{N, Int32} # N is the number of state variables and state_vars[n] is the number of states for state variable n + action_vars::NTuple{M, Int32} # M is the number of action variables and action_vars[m] is the number of actions for action variable m + + transition::P + initial_states::VI + + function FactoredRobustMarkovDecisionProcess( + state_vars::NTuple{N, Int32}, + action_vars::NTuple{M, Int32}, + transition::P, + initial_states::VI = nothing, + ) where {N, M, P <: NTuple{N, <:AbstractMarginal}, VI <: InitialStates{N}} + check_rmdp(state_vars, action_vars, transition, initial_states) + + return new{N, M, P, VI}(state_vars, action_vars, transition, initial_states) + end +end + +function check_rmdp(state_vars, action_vars, transition, initial_states) + check_state_variables(state_vars) + check_action_variables(action_vars) + check_transition(state_vars, action_vars, transition) + check_initial_states(state_vars, initial_states) +end + +function check_state_variables(state_vars) + if any(x -> x <= 0, state_vars) + throw(ArgumentError("All state variables must be positive integers.")) + end +end + +function check_action_variables(action_vars) + if any(x -> x <= 0, action_vars) + throw(ArgumentError("All action variables must be positive integers.")) + end +end + +function check_transition(state_dims, action_dims, transition) + for (i, marginal) in enumerate(transition) + if num_target(marginal) != state_dims[i] + throw(DimensionMismatch("Marginal $i has incorrect number of target states. Expected $(state_dims[i]), got $(num_target(marginal)).")) + end + + if source_shape(marginal) != state_dims[state_variables(marginal)] + throw(DimensionMismatch("Marginal $i has incorrect source shape. Expected $state_dims, got $(source_shape(marginal)).")) + end + + if action_shape(marginal) != action_dims[action_variables(marginal)] + throw(DimensionMismatch("Marginal $i has incorrect action shape. Expected $action_dims, got $(action_shape(marginal)).")) + end + end +end + +function check_initial_states(state_vars, initial_states) + if initial_states isa AllStates + return + end + + N = length(state_vars) + for initial_state in initial_states + if length(initial_state) != N + throw(DimensionMismatch("Each initial state must have length $N.")) + end + + if !all(1 .<= initial_state .<= state_vars) + throw(DimensionMismatch("Each initial state must be within the valid range of states (should be 1 .<= initial_state <= $state_vars, was initial_state=$initial_state).")) + end + end +end + +state_variables(rmdp::FactoredRobustMarkovDecisionProcess) = rmdp.state_vars +action_variables(rmdp::FactoredRobustMarkovDecisionProcess) = rmdp.action_vars +num_states(rmdp::FactoredRobustMarkovDecisionProcess) = prod(state_variables(rmdp)) +num_actions(rmdp::FactoredRobustMarkovDecisionProcess) = prod(action_variables(rmdp)) + +source_shape(m::FactoredRobustMarkovDecisionProcess) = m.state_vars \ No newline at end of file diff --git a/src/models/IntervalMarkovDecisionProcess.jl b/src/models/IntervalMarkovDecisionProcess.jl deleted file mode 100644 index 7916d802..00000000 --- a/src/models/IntervalMarkovDecisionProcess.jl +++ /dev/null @@ -1,185 +0,0 @@ -""" - IntervalMarkovDecisionProcess{ - P <: IntervalProbabilities, - VT <: AbstractVector{Int32}, - VI <: Union{AllStates, AbstractVector} - } - -A type representing (stationary) Interval Markov Decision Processes (IMDP), which are Markov Decision Processes with uncertainty in the form of intervals on -the transition probabilities. - -Formally, let ``(S, S_0, A, \\Gamma)`` be an interval Markov decision process, where -- ``S`` is the set of states, -- ``S_0 \\subseteq S`` is the set of initial states, -- ``A`` is the set of actions, and -- ``\\Gamma = \\{\\Gamma_{s,a}\\}_{s \\in S, a \\in A}`` is a set of interval ambiguity sets on the transition probabilities, for each source-action pair. - -Then the ```IntervalMarkovDecisionProcess``` type is defined as follows: indices `1:num_states` are the states in ``S``, -`transition_prob` represents ``\\Gamma``, actions are implicitly defined by `stateptr` (e.g. if `stateptr[3] == 4` and `stateptr[4] == 7` then -the actions available to state 3 are `[1, 2, 3]`), and `initial_states` is the set of initial states ``S_0``. If no initial states are specified, -then the initial states are assumed to be all states in ``S`` represented by `AllStates`. See [`IntervalProbabilities`](@ref) and [Theory](@ref) for more information -on the structure of the transition probability ambiguity sets. - -### Fields -- `transition_prob::P`: interval on transition probabilities where columns represent source/action pairs and rows represent target states. -- `stateptr::VT`: pointer to the start of each source state in `transition_prob` (i.e. `transition_prob[:, stateptr[j]:stateptr[j + 1] - 1]` is the transition - probability matrix for source state `j`) in the style of colptr for sparse matrices in CSC format. -- `initial_states::VI`: initial states. -- `num_states::Int32`: number of states. - -### Examples - -```jldoctest -transition_probs = IntervalProbabilities(; - lower = [ - 0.0 0.5 0.1 0.2 0.0 - 0.1 0.3 0.2 0.3 0.0 - 0.2 0.1 0.3 0.4 1.0 - ], - upper = [ - 0.5 0.7 0.6 0.6 0.0 - 0.6 0.5 0.5 0.5 0.0 - 0.7 0.3 0.4 0.4 1.0 - ], -) - -stateptr = [1, 3, 5, 6] -initial_states = [1] - -mdp = IntervalMarkovDecisionProcess(transition_probs, stateptr, initial_states) -``` - -There is also a constructor for `IntervalMarkovDecisionProcess` where the transition probabilities are given as a list of -transition probabilities for each source state. - -```jldoctest -prob1 = IntervalProbabilities(; - lower = [ - 0.0 0.5 - 0.1 0.3 - 0.2 0.1 - ], - upper = [ - 0.5 0.7 - 0.6 0.5 - 0.7 0.3 - ], -) - -prob2 = IntervalProbabilities(; - lower = [ - 0.1 0.2 - 0.2 0.3 - 0.3 0.4 - ], - upper = [ - 0.6 0.6 - 0.5 0.5 - 0.4 0.4 - ], -) - -prob3 = IntervalProbabilities(; - lower = [0.0; 0.0; 1.0], - upper = [0.0; 0.0; 1.0] -) - -transition_probs = [prob1, prob2, prob3] -initial_states = [1] - -mdp = IntervalMarkovDecisionProcess(transition_probs, initial_states) -``` - -""" -struct IntervalMarkovDecisionProcess{ - P <: IntervalProbabilities, - VT <: AbstractVector{Int32}, - VI <: InitialStates, -} <: IntervalMarkovProcess - transition_prob::P - stateptr::VT - initial_states::VI - num_states::Int32 -end - -function IntervalMarkovDecisionProcess( - transition_prob::IntervalProbabilities, - stateptr::AbstractVector{Int32}, - initial_states::InitialStates = AllStates(), -) - num_states = checksize_imdp(transition_prob, stateptr) - - return IntervalMarkovDecisionProcess( - transition_prob, - stateptr, - initial_states, - num_states, - ) -end - -function IntervalMarkovDecisionProcess( - transition_probs::Vector{<:IntervalProbabilities}, - initial_states::InitialStates = AllStates(), -) - transition_prob, stateptr = interval_prob_hcat(transition_probs) - - return IntervalMarkovDecisionProcess(transition_prob, stateptr, initial_states) -end - -""" - IntervalMarkovChain(transition_prob::IntervalProbabilities, initial_states::InitialStates = AllStates()) - -Construct an Interval Markov Chain from a square matrix pair of interval transition probabilities. The initial states are optional and if not specified, -all states are assumed to be initial states. The number of states is inferred from the size of the transition probability matrix. - -The returned type is an `IntervalMarkovDecisionProcess` with only one action per state (i.e. `stateptr[j + 1] - stateptr[j] == 1` for all `j`). -This is done to unify the interface for value iteration. -""" -function IntervalMarkovChain( - transition_prob::IntervalProbabilities, - initial_states::InitialStates = AllStates(), -) - stateptr = UnitRange{Int32}(1, num_source(transition_prob) + 1) - return IntervalMarkovDecisionProcess(transition_prob, stateptr, initial_states) -end - -function checksize_imdp(p::IntervalProbabilities, stateptr::AbstractVector{Int32}) - num_states = length(stateptr) - 1 - - min_actions = mindiff(stateptr) - if any(min_actions <= 0) - throw(ArgumentError("The number of actions per state must be positive.")) - end - - if num_states > num_target(p) - throw( - DimensionMismatch( - "The number of target states ($(num_target(p))) is less than the number of states in the stateptr $(num_states).", - ), - ) - end - - if stateptr[end] - 1 != num_source(p) - throw( - DimensionMismatch( - "The number of source states ($(num_source(p))) must be equal to the number of states in the stateptr $(num_states).", - ), - ) - end - - return Int32(num_target(p)) -end - -""" - stateptr(mdp::IntervalMarkovDecisionProcess) - -Return the state pointer of the Interval Markov Decision Process. The state pointer is a vector of integers where the `i`-th element -is the index of the first element of the `i`-th state in the transition probability matrix. -I.e. `transition_prob[:, stateptr[j]:stateptr[j + 1] - 1]` is the transition probability matrix for source state `j`. -""" -stateptr(mdp::IntervalMarkovDecisionProcess) = mdp.stateptr - -max_actions(mdp::IntervalMarkovDecisionProcess) = maxdiff(stateptr(mdp)) -Base.ndims(::IntervalMarkovDecisionProcess) = one(Int32) -product_num_states(mp::IntervalMarkovDecisionProcess) = (num_states(mp),) -source_shape(mp::IntervalMarkovDecisionProcess) = (length(stateptr(mp)) - 1,) diff --git a/src/models/MixtureIntervalMarkovDecisionProcess.jl b/src/models/MixtureIntervalMarkovDecisionProcess.jl deleted file mode 100644 index 6fafb014..00000000 --- a/src/models/MixtureIntervalMarkovDecisionProcess.jl +++ /dev/null @@ -1,169 +0,0 @@ -""" - MixtureIntervalMarkovDecisionProcess{ - P <: IntervalProbabilities, - VT <: AbstractVector{Int32}, - VI <: Union{AllStates, AbstractVector} - } - -A type representing (stationary) Mixture Interval Markov Decision Processes (OIMDP), which are IMDPs where the transition -probabilities for each state can be represented as the product of the transition probabilities of individual processes. - -Formally, let ``(S, S_0, A, \\Gamma, \\Gamma_\\alpha)`` be an interval Markov decision processes, where -- ``S = S_1 \\times \\cdots \\times S_n`` is the set of joint states with ``S_i`` the set of states for the `i`-th marginal, -- ``S_0 \\subseteq S`` is the set of initial states, -- ``A`` is the set of actions, -- ``\\Gamma = \\{\\Gamma_{s,a}\\}_{s \\in S, a \\in A}`` is a set of interval ambiguity sets on the transition probabilities, - for each source-action pair, with ``\\Gamma_{s,a} = \\bigotimes_{i=1}^n \\Gamma_{s,a}^i`` and ``\\Gamma_{s,a}^i`` is a marginal interval ambiguity sets - on the ``i``-th marginal, and -- ``\\Gamma^\\alpha = \\{\\Gamma^\\alpha_{s,a}\\}_{s \\in S, a \\in A}`` is the interval ambiguity set for the mixture. - -Then the ```MixtureIntervalMarkovDecisionProcess``` type is defined as follows: indices `1:num_states` are the states in ``S`` and -`transition_prob` represents ``\\Gamma`` and ``\\Gamma^\\alpha``. Actions are implicitly defined by `stateptr` (e.g. if `source_dims` in `transition_prob` -is `(2, 3, 2)`, and `stateptr[3] == 4` and `stateptr[4] == 7` then the actions available to state `CartesianIndex(1, 2, 1)` are `[1, 2, 3]`), and `initial_states` -is the set of initial states ``S_0``. If no initial states are specified, then the initial states are assumed to be all states in ``S`` -represented by `AllStates`. See [`MixtureIntervalProbabilities`](@ref) and [Theory](@ref) for more information on the structure -of the transition probability ambiguity sets. - -### Fields -- `transition_prob::P`: ambiguity set on transition probabilities (see [`MixtureIntervalProbabilities`](@ref) for the structure). -- `stateptr::VT`: pointer to the start of each source state in `transition_prob` (i.e. `transition_prob[k][l][:, stateptr[j]:stateptr[j + 1] - 1]` is the transition - probability matrix for source state `j` for each model `k` and axis `l`) in the style of colptr for sparse matrices in CSC format. -- `initial_states::VI`: initial states. -- `num_states::Int32`: number of states. - -### Examples -The following example is a simple mixture of two `OrthogonalIntervalProbabilities` with one dimension and the same source/action pairs. -The first state has two actions and the second state has one action. The weighting ambiguity set is also specified for the same three source-action pairs. - -```jldoctest -prob1 = OrthogonalIntervalProbabilities( - ( - IntervalProbabilities(; - lower = [ - 0.0 0.5 0.1 - 0.1 0.3 0.2 - ], - upper = [ - 0.5 0.7 0.6 - 0.7 0.4 0.8 - ], - ), - ), - (Int32(2),), -) -prob2 = OrthogonalIntervalProbabilities( - ( - IntervalProbabilities(; - lower = [ - 0.1 0.4 0.2 - 0.3 0.0 0.1 - ], - upper = [ - 0.4 0.6 0.5 - 0.7 0.5 0.7 - ], - ), - ), - (Int32(2),), -) -weighting_probs = IntervalProbabilities(; lower = [ - 0.3 0.5 0.4 - 0.4 0.3 0.2 -], upper = [ - 0.8 0.7 0.7 - 0.7 0.5 0.4 -]) -mixture_prob = MixtureIntervalProbabilities((prob1, prob2), weighting_probs) - -stateptr = Int32[1, 3, 4] -mdp = MixtureIntervalMarkovDecisionProcess(mixture_prob, stateptr) -``` -""" -struct MixtureIntervalMarkovDecisionProcess{ - P <: MixtureIntervalProbabilities, - VT <: AbstractVector{Int32}, - VI <: InitialStates, -} <: IntervalMarkovProcess - transition_prob::P - stateptr::VT - initial_states::VI - num_states::Int32 -end - -function MixtureIntervalMarkovDecisionProcess( - transition_prob::MixtureIntervalProbabilities, - stateptr::AbstractVector{Int32}, - initial_states::InitialStates = AllStates(), -) - num_states = checksize_imdp(transition_prob, stateptr) - - return MixtureIntervalMarkovDecisionProcess( - transition_prob, - stateptr, - initial_states, - num_states, - ) -end - -function MixtureIntervalMarkovDecisionProcess( - transition_probs::Vector{<:MixtureIntervalProbabilities}, - initial_states::InitialStates = AllStates(), -) - # TODO: Fix - transition_prob, stateptr = interval_prob_hcat(transition_probs) - - return MixtureIntervalMarkovDecisionProcess(transition_prob, stateptr, initial_states) -end - -""" - MixtureIntervalMarkovChain(transition_prob::MixtureIntervalProbabilities, initial_states::InitialStates = AllStates()) - -Construct a Mixture Interval Markov Chain from mixture interval transition probabilities. The initial states are optional and if not specified, -all states are assumed to be initial states. The number of states is inferred from the size of the transition probability matrix. - -The returned type is an `MixtureIntervalMarkovDecisionProcess` with only one action per state (i.e. `stateptr[j + 1] - stateptr[j] == 1` for all `j`). -This is done to unify the interface for value iteration. -""" -function MixtureIntervalMarkovChain( - transition_prob::MixtureIntervalProbabilities, - initial_states::InitialStates = AllStates(), -) - stateptr = UnitRange{Int32}(1, num_source(transition_prob) + 1) - return MixtureIntervalMarkovDecisionProcess(transition_prob, stateptr, initial_states) -end - -function checksize_imdp(p::MixtureIntervalProbabilities, stateptr::AbstractVector{Int32}) - num_states = length(stateptr) - 1 - - min_actions = mindiff(stateptr) - if any(min_actions <= 0) - throw(ArgumentError("The number of actions per state must be positive.")) - end - - if num_states > prod(num_target, first(p)) - throw( - DimensionMismatch( - "The number of target states ($(prod(num_target, first(p))) = $(map(num_target, first(p)))) is less than the number of states in the problem $(num_states).", - ), - ) - end - # TODO:: Check that source_dims match stateptr - - return Int32(prod(num_target, first(p))) -end - -""" - stateptr(mdp::MixtureIntervalMarkovDecisionProcess) - -Return the state pointer of the Interval Markov Decision Process. The state pointer is a vector of integers where the `i`-th element -is the index of the first element of the `i`-th state in the transition probability matrix. -I.e. `mixture_probs(transition_prob)[k][l][:, stateptr[j]:stateptr[j + 1] - 1]` is the independent transition probability matrix for (flattened) source state `j` -for axis `l` and model `k`, and `mixture_probs(transition_prob)[:, stateptr[j]:stateptr[j + 1] - 1]` is the weighting matrix for `j`. -""" -stateptr(mdp::MixtureIntervalMarkovDecisionProcess) = mdp.stateptr - -max_actions(mdp::MixtureIntervalMarkovDecisionProcess) = maxdiff(stateptr(mdp)) -Base.ndims(::MixtureIntervalMarkovDecisionProcess{N}) where {N} = Int32(N) -product_num_states(mp::MixtureIntervalMarkovDecisionProcess) = - num_target(transition_prob(mp)) -source_shape(mp::MixtureIntervalMarkovDecisionProcess) = source_shape(transition_prob(mp)) diff --git a/src/models/OrthogonalIntervalMarkovDecisionProcess.jl b/src/models/OrthogonalIntervalMarkovDecisionProcess.jl deleted file mode 100644 index 9d63f5da..00000000 --- a/src/models/OrthogonalIntervalMarkovDecisionProcess.jl +++ /dev/null @@ -1,143 +0,0 @@ -""" - OrthogonalIntervalMarkovDecisionProcess{ - P <: OrthogonalIntervalProbabilities, - VT <: AbstractVector{Int32}, - VI <: Union{AllStates, AbstractVector} - } - -A type representing (stationary) Orthogonal Interval Markov Decision Processes (OIMDP), which are IMDPs where the transition -probabilities for each state can be represented as the product of the transition probabilities of individual processes. - -Formally, let ``(S, S_0, A, \\Gamma)`` be an orthogonal interval Markov decision process [1], where -- ``S = S_1 \\times \\cdots \\times S_n`` is the set of joint states with ``S_i`` the set of states for the `i`-th marginal, -- ``S_0 \\subseteq S`` is the set of initial states, -- ``A`` is the set of actions, and -- ``\\Gamma = \\{\\Gamma_{s,a}\\}_{s \\in S, a \\in A}`` is a set of interval ambiguity sets on the transition probabilities, - for each source-action pair, with ``\\Gamma_{s,a} = \\bigotimes_{i=1}^n \\Gamma_{s,a}^i`` and ``\\Gamma_{s,a}^i`` is a marginal interval ambiguity sets - on the ``i``-th marginal. - -Then the ```OrthogonalIntervalMarkovDecisionProcess``` type is defined as follows: indices `1:num_states` are the states in ``S`` and -`transition_prob` represents ``\\Gamma``. Actions are implicitly defined by `stateptr` (e.g. if `source_dims` in `transition_prob` is `(2, 3, 2)`, and -`stateptr[3] == 4` and `stateptr[4] == 7` then the actions available to state `CartesianIndex(1, 2, 1)` are `[1, 2, 3]`), and `initial_states` -is the set of initial states ``S_0``. If no initial states are specified, then the initial states are assumed to be all states in ``S`` -represented by `AllStates`. See [`OrthogonalIntervalProbabilities`](@ref) and [Theory](@ref) for more information on the structure -of the transition probability ambiguity sets. - -### Fields -- `transition_prob::P`: interval on transition probabilities where columns represent source/action pairs and rows represent target states along each marginal. -- `stateptr::VT`: pointer to the start of each source state in `transition_prob` (i.e. `transition_prob[l][:, stateptr[j]:stateptr[j + 1] - 1]` is the transition - probability matrix for source state `j` for each axis `l`) in the style of colptr for sparse matrices in CSC format. -- `initial_states::VI`: initial states. -- `num_states::Int32`: number of states. - -### Examples -Assume that `prob1`, `prob2`, and `prob3` are `IntervalProbabilities` for the first, second, and third axis, respectively, defined as the example -in [`OrthogonalIntervalProbabilities`](@ref). Then the following code constructs an `OrthogonalIntervalMarkovDecisionProcess` with three axes of three states each. -The number of actions per state is one, i.e. the model is a Markov chain. Therefore, the `stateptr` is a unit range `1:num_states + 1` and we can call -the convenience constructor `OrthogonalIntervalMarkovChain` instead. - -```jldoctest -prob = OrthogonalIntervalProbabilities((prob1, prob2, prob3), (Int32(3), Int32(3), Int32(3))) -mc = OrthogonalIntervalMarkovChain(prob) -``` - -[1] Mathiesen, F. B., Haesaert, S., & Laurenti, L. (2024). Scalable control synthesis for stochastic systems via structural IMDP abstractions. arXiv preprint arXiv:2411.11803. - -""" -struct OrthogonalIntervalMarkovDecisionProcess{ - P <: OrthogonalIntervalProbabilities, - VT <: AbstractVector{Int32}, - VI <: InitialStates, -} <: IntervalMarkovProcess - transition_prob::P - stateptr::VT - initial_states::VI - num_states::Int32 -end - -function OrthogonalIntervalMarkovDecisionProcess( - transition_prob::OrthogonalIntervalProbabilities, - stateptr::AbstractVector{Int32}, - initial_states::InitialStates = AllStates(), -) - num_states = checksize_imdp(transition_prob, stateptr) - - return OrthogonalIntervalMarkovDecisionProcess( - transition_prob, - stateptr, - initial_states, - num_states, - ) -end - -function OrthogonalIntervalMarkovDecisionProcess( - transition_probs::Vector{<:OrthogonalIntervalProbabilities}, - initial_states::InitialStates = AllStates(), -) - # TODO: Fix - transition_prob, stateptr = interval_prob_hcat(transition_probs) - - return OrthogonalIntervalMarkovDecisionProcess( - transition_prob, - stateptr, - initial_states, - ) -end - -""" - OrthogonalIntervalMarkovChain(transition_prob::OrthogonalIntervalProbabilities, initial_states::InitialStates = AllStates()) - -Construct a Orthogonal Interval Markov Chain from orthogonal interval transition probabilities. The initial states are optional and if not specified, -all states are assumed to be initial states. The number of states is inferred from the size of the transition probability matrix. - -The returned type is an `OrthogonalIntervalMarkovDecisionProcess` with only one action per state (i.e. `stateptr[j + 1] - stateptr[j] == 1` for all `j`). -This is done to unify the interface for value iteration. -""" -function OrthogonalIntervalMarkovChain( - transition_prob::OrthogonalIntervalProbabilities, - initial_states::InitialStates = AllStates(), -) - stateptr = UnitRange{Int32}(1, num_source(transition_prob) + 1) - return OrthogonalIntervalMarkovDecisionProcess( - transition_prob, - stateptr, - initial_states, - ) -end - -function checksize_imdp(p::OrthogonalIntervalProbabilities, stateptr::AbstractVector{Int32}) - num_states = length(stateptr) - 1 - - min_actions = mindiff(stateptr) - if any(min_actions <= 0) - throw(ArgumentError("The number of actions per state must be positive.")) - end - - if num_states > prod(num_target, p.probs) - throw( - DimensionMismatch( - "The number of target states ($(prod(num_target, p.probs)) = $(map(num_target, p.probs))) is less than the number of states in the problem $(num_states).", - ), - ) - end - - # TODO:: Check that source_dims match stateptr - - return Int32(prod(num_target, p.probs)) -end - -""" - stateptr(mdp::OrthogonalIntervalMarkovDecisionProcess) - -Return the state pointer of the Interval Markov Decision Process. The state pointer is a vector of integers where the `i`-th element -is the index of the first element of the `i`-th state in the transition probability matrix. -I.e. `transition_prob[l][:, stateptr[j]:stateptr[j + 1] - 1]` is the transition probability matrix for (flattened) source state `j` for axis `l`. -""" -stateptr(mdp::OrthogonalIntervalMarkovDecisionProcess) = mdp.stateptr - -max_actions(mdp::OrthogonalIntervalMarkovDecisionProcess) = maxdiff(stateptr(mdp)) -Base.ndims(::OrthogonalIntervalMarkovDecisionProcess{N}) where {N} = Int32(N) -product_num_states(mp::OrthogonalIntervalMarkovDecisionProcess) = - num_target(transition_prob(mp)) -source_shape(mp::OrthogonalIntervalMarkovDecisionProcess) = - source_shape(transition_prob(mp)) diff --git a/src/models/models.jl b/src/models/models.jl index d7c15c18..e5a78432 100644 --- a/src/models/models.jl +++ b/src/models/models.jl @@ -2,16 +2,11 @@ abstract type StochasticProcess end include("IntervalMarkovProcess.jl") export IntervalMarkovProcess, AllStates -export transition_prob, num_states, initial_states, stateptr, tomarkovchain +export num_states, num_actions, initial_states -include("IntervalMarkovDecisionProcess.jl") -export IntervalMarkovDecisionProcess, IntervalMarkovChain - -include("OrthogonalIntervalMarkovDecisionProcess.jl") -export OrthogonalIntervalMarkovDecisionProcess, OrthogonalIntervalMarkovChain - -include("MixtureIntervalMarkovDecisionProcess.jl") -export MixtureIntervalMarkovDecisionProcess, MixtureIntervalMarkovChain +include("FactoredRobustMarkovDecisionProcess.jl") +const FactoredRMDP = FactoredRobustMarkovDecisionProcess +export FactoredRobustMarkovDecisionProcess, state_variables, action_variables include("DeterministicAutomaton.jl") diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl new file mode 100644 index 00000000..85f51267 --- /dev/null +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -0,0 +1,189 @@ +""" + IntervalAmbiguitySets{R, MR <: AbstractMatrix{R}, N, M, I} + +A matrix pair to represent the lower and upper bound transition probabilities from all source/action pairs to all target states. +The matrices can be `Matrix{R}` or `SparseMatrixCSC{R}`, or their CUDA equivalents. For memory efficiency, it is recommended to use sparse matrices. + +The columns represent the source and the rows represent the target, as if the probability matrix was a linear transformation. +Mathematically, let ``P`` be the probability matrix. Then ``P_{ij}`` represents the probability of transitioning from state ``j`` (or with state/action pair ``j``) to state ``i``. +Due to the column-major format of Julia, this is also a more efficient representation (in terms of cache locality). + +The lower bound is explicitly stored, while the upper bound is computed from the lower bound and the gap. This choice is +because it simplifies repeated probability assignment using O-maximization [1]. + +### Fields +- `lower::MR`: The lower bound transition probabilities from a source state or source/action pair to a target state. +- `gap::MR`: The gap between upper and lower bound transition probabilities from a source state or source/action pair to a target state. +- `sum_lower::VR`: The sum of lower bound transition probabilities from a source state or source/action pair to all target states. + +### Examples +```jldoctest +dense_prob = IntervalAmbiguitySets(; + lower = [0.0 0.5; 0.1 0.3; 0.2 0.1], + upper = [0.5 0.7; 0.6 0.5; 0.7 0.3], +) + +sparse_prob = IntervalAmbiguitySets(; + lower = sparse_hcat( + SparseVector(15, [4, 10], [0.1, 0.2]), + SparseVector(15, [5, 6, 7], [0.5, 0.3, 0.1]), + ), + upper = sparse_hcat( + SparseVector(15, [1, 4, 10], [0.5, 0.6, 0.7]), + SparseVector(15, [5, 6, 7], [0.7, 0.5, 0.3]), + ), +) +``` + +[1] M. Lahijanian, S. B. Andersson and C. Belta, "Formal Verification and Synthesis for Discrete-Time Stochastic Systems," in IEEE Transactions on Automatic Control, vol. 60, no. 8, pp. 2031-2045, Aug. 2015, doi: 10.1109/TAC.2015.2398883. + +""" +struct IntervalAmbiguitySets{R, MR <: AbstractMatrix{R}, N, M, I <: LinearIndices} <: AbstractMarginal + lower::MR + gap::MR + + state_indices::NTuple{N, Int32} + action_indices::NTuple{M, Int32} + + source_dims::NTuple{N, Int32} + action_dims::NTuple{M, Int32} + linear_index::I + + function IntervalAmbiguitySets(lower::MR, gap::MR, state_indices, action_indices, source_dims, action_dims) where {R, MR <: AbstractMatrix{R}} + checkprobabilities!(lower, gap) + + linear_index = LinearIndices((source_dims..., action_dims...)) + return IntervalAmbiguitySets(lower, gap, state_indices, action_indices, source_dims, action_dims, linear_index) + end +end + +# Constructor for upper and lower bounds +# Constructor if no state/action indices are given (i.e. only one state and one action variable) + +function checkprobabilities!(lower::AbstractMatrix, gap::AbstractMatrix) + @assert all(lower .>= 0) "The lower bound transition probabilities must be non-negative." + @assert all(gap .>= 0) "The gap transition probabilities must be non-negative." + @assert all(lower .+ gap .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1." + + sum_lower = vec(sum(lower; dims = 1)) + max_lower_bound = maximum(sum_lower) + @assert max_lower_bound <= 1 "The joint lower bound transition probability per column (max is $max_lower_bound) should be less than or equal to 1." + + sum_upper = vec(sum(lower + gap; dims = 1)) + max_upper_bound = minimum(sum_upper) + @assert max_upper_bound >= 1 "The joint upper bound transition probability per column (min is $max_upper_bound) should be greater than or equal to 1." +end + +function checkprobabilities!(lower::AbstractSparseMatrix, gap::AbstractSparseMatrix) + @assert all(nonzeros(lower) .>= 0) "The lower bound transition probabilities must be non-negative." + @assert all(nonzeros(gap) .>= 0) "The gap transition probabilities must be non-negative." + @assert all(nonzeros(lower) .+ nonzeros(gap) .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1." + + sum_lower = vec(sum(lower; dims = 1)) + max_lower_bound = maximum(sum_lower) + @assert max_lower_bound <= 1 "The joint lower bound transition probability per column (max is $max_lower_bound) should be less than or equal to 1." + + sum_upper = vec(sum(lower + gap; dims = 1)) + max_upper_bound = minimum(sum_upper) + @assert max_upper_bound >= 1 "The joint upper bound transition probability per column (min is $max_upper_bound) should be greater than or equal to 1." +end + +function checkindices( + state_indices::NTuple{N, Int32}, + action_indices::NTuple{M, Int32}, + source_dims::NTuple{N, Int32}, + action_dims::NTuple{M, Int32}, +) where {N, M} + # TODO: More checks + @assert all(state_indices .> 0) "State indices must be positive." + @assert all(action_indices .> 0) "Action indices must be positive." + + @assert length(state_indices) == length(source_dims) "Length of state indices must match length of source dimensions." + @assert length(action_indices) == length(action_dims) "Length of action indices must match length of action dimensions." + + total_source = prod(source_dims) + total_action = prod(action_dims) + + @assert all(state_indices .<= total_source) "State indices must not exceed total number of source states ($total_source)." + @assert all(action_indices .<= total_action) "Action indices must not exceed total number of actions ($total_action)." +end + +# Keyword constructor from lower and upper +function IntervalAmbiguitySets(; lower::MR, upper::MR) where {MR <: AbstractMatrix} + lower, gap = compute_gap(lower, upper) + return IntervalAmbiguitySets(lower, gap) +end + +function compute_gap(lower::MR, upper::MR) where {MR <: AbstractMatrix} + gap = upper - lower + return lower, gap +end + +function compute_gap( + lower::MR, + upper::MR, +) where {R, MR <: SparseArrays.AbstractSparseMatrixCSC{R}} + I, J, _ = findnz(upper) + + gap_nonzeros = Vector{R}(undef, length(I)) + lower_nonzeros = Vector{R}(undef, length(I)) + + for (k, (i, j)) in enumerate(zip(I, J)) + gap_nonzeros[k] = upper[i, j] - lower[i, j] + lower_nonzeros[k] = lower[i, j] + end + + gap = SparseArrays.FixedSparseCSC( + size(upper)..., + upper.colptr, + upper.rowval, + gap_nonzeros, + ) + lower = SparseArrays.FixedSparseCSC( + size(upper)..., + upper.colptr, + upper.rowval, + lower_nonzeros, + ) + return lower, gap +end + +state_variables(p::IntervalAmbiguitySets) = p.source_indices +action_variables(p::IntervalAmbiguitySets) = p.action_indices +source_shape(p::IntervalAmbiguitySets) = p.source_dims +action_shape(p::IntervalAmbiguitySets) = p.action_dims +num_target(p::IntervalAmbiguitySets) = size(p.lower, 1) + +struct IntervalAmbiguitySet{R, VR <: AbstractVector{R}} + lower::VR + gap::VR +end + +function Base.getindex(p::IntervalAmbiguitySets, source::CartesianIndex, action::CartesianIndex) + source = Tuple(source)[p.state_indices] + action = Tuple(action)[p.action_indices] + j = p.linear_index[source..., action...] + + # TODO: Consider ways to avoid specifying/allocating self-loops + + # Select by columns only! + l = @view p.lower[:, j] + g = @view p.gap[:, j] + + return IntervalAmbiguitySet(l, g) +end + +lower(p::IntervalAmbiguitySet) = p.lower +lower(p::IntervalAmbiguitySet, destination) = p.lower[destination] + +upper(p::IntervalAmbiguitySet) = p.lower + p.gap +upper(p::IntervalAmbiguitySet, destination) = p.lower[destination] + p.gap[destination] + +gap(p::IntervalAmbiguitySet) = p.gap +gap(p::IntervalAmbiguitySet, destination) = p.gap[destination] + +const ColumnView{Tv} = SubArray{Tv, 1, <:AbstractMatrix{Tv}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} +support(p::IntervalAmbiguitySet{R, <:ColumnView{R}}) = eachindex(p.lower) + +const SparseColumnView{Tv, Ti} = SubArray{Tv, 1, <:AbstractSparseMatrixCSC{Tv, Ti}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} +support(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) = rowvals(p.lower) \ No newline at end of file diff --git a/src/probabilities/IntervalProbabilities.jl b/src/probabilities/IntervalProbabilities.jl deleted file mode 100644 index ba1c3c35..00000000 --- a/src/probabilities/IntervalProbabilities.jl +++ /dev/null @@ -1,244 +0,0 @@ -""" - IntervalProbabilities{R, VR <: AbstractVector{R}, MR <: AbstractMatrix{R}} - -A matrix pair to represent the lower and upper bound transition probabilities from all source states or source/action pairs to all target states. -The matrices can be `Matrix{R}` or `SparseMatrixCSC{R}`, or their CUDA equivalents. For memory efficiency, it is recommended to use sparse matrices. - -The columns represent the source and the rows represent the target, as if the probability matrix was a linear transformation. -Mathematically, let ``P`` be the probability matrix. Then ``P_{ij}`` represents the probability of transitioning from state ``j`` (or with state/action pair ``j``) to state ``i``. -Due to the column-major format of Julia, this is also a more efficient representation (in terms of cache locality). - -The lower bound is explicitly stored, while the upper bound is computed from the lower bound and the gap. This choice is -because it simplifies repeated probability assignment using O-maximization [1]. - -### Fields -- `lower::MR`: The lower bound transition probabilities from a source state or source/action pair to a target state. -- `gap::MR`: The gap between upper and lower bound transition probabilities from a source state or source/action pair to a target state. -- `sum_lower::VR`: The sum of lower bound transition probabilities from a source state or source/action pair to all target states. - -### Examples -```jldoctest -dense_prob = IntervalProbabilities(; - lower = [0.0 0.5; 0.1 0.3; 0.2 0.1], - upper = [0.5 0.7; 0.6 0.5; 0.7 0.3], -) - -sparse_prob = IntervalProbabilities(; - lower = sparse_hcat( - SparseVector(15, [4, 10], [0.1, 0.2]), - SparseVector(15, [5, 6, 7], [0.5, 0.3, 0.1]), - ), - upper = sparse_hcat( - SparseVector(15, [1, 4, 10], [0.5, 0.6, 0.7]), - SparseVector(15, [5, 6, 7], [0.7, 0.5, 0.3]), - ), -) -``` - -[1] M. Lahijanian, S. B. Andersson and C. Belta, "Formal Verification and Synthesis for Discrete-Time Stochastic Systems," in IEEE Transactions on Automatic Control, vol. 60, no. 8, pp. 2031-2045, Aug. 2015, doi: 10.1109/TAC.2015.2398883. - -""" -struct IntervalProbabilities{R, VR <: AbstractVector{R}, MR <: AbstractMatrix{R}} <: - AbstractIntervalProbabilities - lower::MR - gap::MR - - sum_lower::VR -end - -# Constructor from lower and gap with sanity assertions -function IntervalProbabilities(lower::MR, gap::MR) where {R, MR <: AbstractMatrix{R}} - checkprobabilities!(lower, gap) - - sum_lower = vec(sum(lower; dims = 1)) - - max_lower_bound = maximum(sum_lower) - @assert max_lower_bound <= 1 "The joint lower bound transition probability per column (max is $max_lower_bound) should be less than or equal to 1." - - sum_upper = vec(sum(lower + gap; dims = 1)) - - max_upper_bound = minimum(sum_upper) - @assert max_upper_bound >= 1 "The joint upper bound transition probability per column (min is $max_upper_bound) should be greater than or equal to 1." - - return IntervalProbabilities(lower, gap, sum_lower) -end - -function checkprobabilities!(lower::AbstractMatrix, gap::AbstractMatrix) - @assert all(lower .>= 0) "The lower bound transition probabilities must be non-negative." - @assert all(gap .>= 0) "The gap transition probabilities must be non-negative." - @assert all(gap .<= 1) "The gap transition probabilities must be less than or equal to 1." - @assert all(lower .+ gap .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1." -end - -function checkprobabilities!(lower::AbstractSparseMatrix, gap::AbstractSparseMatrix) - @assert all(nonzeros(lower) .>= 0) "The lower bound transition probabilities must be non-negative." - @assert all(nonzeros(gap) .>= 0) "The gap transition probabilities must be non-negative." - @assert all(nonzeros(gap) .<= 1) "The gap transition probabilities must be less than or equal to 1." - @assert all(nonzeros(lower) .+ nonzeros(gap) .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1." -end - -# Keyword constructor from lower and upper -function IntervalProbabilities(; lower::MR, upper::MR) where {MR <: AbstractMatrix} - lower, gap = compute_gap(lower, upper) - return IntervalProbabilities(lower, gap) -end - -function compute_gap(lower::MR, upper::MR) where {MR <: AbstractMatrix} - gap = upper - lower - return lower, gap -end - -function compute_gap( - lower::MR, - upper::MR, -) where {R, MR <: SparseArrays.AbstractSparseMatrixCSC{R}} - I, J, _ = findnz(upper) - - gap_nonzeros = Vector{R}(undef, length(I)) - lower_nonzeros = Vector{R}(undef, length(I)) - - for (k, (i, j)) in enumerate(zip(I, J)) - gap_nonzeros[k] = upper[i, j] - lower[i, j] - lower_nonzeros[k] = lower[i, j] - end - - gap = SparseArrays.FixedSparseCSC( - size(upper)..., - upper.colptr, - upper.rowval, - gap_nonzeros, - ) - lower = SparseArrays.FixedSparseCSC( - size(upper)..., - upper.colptr, - upper.rowval, - lower_nonzeros, - ) - return lower, gap -end - -# Accessors for properties of interval probabilities - -Base.size(p::IntervalProbabilities) = size(p.lower) -Base.size(p::IntervalProbabilities, dim::Integer) = size(p.lower, dim) -# Views for interval probabilities are only for the source states or source/action pairs -Base.view(p::IntervalProbabilities, J) = - IntervalProbabilities(view(lower(p), :, J), view(gap(p), :, J), view(sum_lower(p), J)) - -""" - lower(p::IntervalProbabilities) - -Return the lower bound transition probabilities from a source state or source/action pair to a target state. -""" -lower(p::IntervalProbabilities) = p.lower - -""" - lower(p::IntervalProbabilities, i, j) - -Return the lower bound transition probabilities from a source state or source/action pair to a target state. -""" -lower(p::IntervalProbabilities, i, j) = p.lower[i, j] -lower(p::IntervalProbabilities, ::Colon, j) = @view(p.lower[:, j]) - -""" - upper(p::IntervalProbabilities) - -Return the upper bound transition probabilities from a source state or source/action pair to a target state. - -!!! note - It is not recommended to use this function for the hot loop of O-maximization. Because the [`IntervalProbabilities`](@ref) - stores the lower and gap transition probabilities, fetching the upper bound requires allocation and computation. -""" -upper(p::IntervalProbabilities) = p.lower + p.gap - -""" - upper(p::IntervalProbabilities, i, j) - -Return the upper bound transition probabilities from a source state or source/action pair to a target state. - -!!! note - It is not recommended to use this function for the hot loop of O-maximization. Because the [`IntervalProbabilities`](@ref) - stores the lower and gap transition probabilities, fetching the upper bound requires allocation and computation. -""" -upper(p::IntervalProbabilities, i, j) = p.lower[i, j] + p.gap[i, j] - -""" - gap(p::IntervalProbabilities) - -Return the gap between upper and lower bound transition probabilities from a source state or source/action pair to a target state. -""" -gap(p::IntervalProbabilities) = p.gap - -""" - gap(p::IntervalProbabilities, i, j) - -Return the gap between upper and lower bound transition probabilities from a source state or source/action pair to a target state. -""" -gap(p::IntervalProbabilities, i, j) = p.gap[i, j] -gap(p::IntervalProbabilities, ::Colon, j) = @view(p.gap[:, j]) - -""" - sum_lower(p::IntervalProbabilities) - -Return the sum of lower bound transition probabilities from a source state or source/action pair to all target states. -This is useful in efficiently implementing O-maximization, where we start with a lower bound probability assignment -and iteratively, according to the ordering, adding the gap until the sum of probabilities is 1. -""" -sum_lower(p::IntervalProbabilities) = p.sum_lower - -""" - sum_lower(p::IntervalProbabilities, j) - -Return the sum of lower bound transition probabilities from a source state or source/action pair to all target states. -This is useful in efficiently implementing O-maximization, where we start with a lower bound probability assignment -and iteratively, according to the ordering, adding the gap until the sum of probabilities is 1. -""" -sum_lower(p::IntervalProbabilities, j) = p.sum_lower[j] - -""" - num_source(p::IntervalProbabilities) - -Return the number of source states or source/action pairs. -""" -num_source(p::IntervalProbabilities) = size(gap(p), 2) -source_shape(p::IntervalProbabilities) = (num_source(p),) - -""" - axes_source(p::IntervalProbabilities) - -Return the valid range of indices for the source states or source/action pairs. -""" -axes_source(p::IntervalProbabilities) = axes(gap(p), 2) - -""" - num_target(p::IntervalProbabilities) - -Return the number of target states. -""" -num_target(p::IntervalProbabilities) = size(gap(p), 1) - -Base.ndims(::IntervalProbabilities) = one(Int32) -stateptr(prob::IntervalProbabilities) = UnitRange{Int32}(1, num_source(prob) + 1) - -function interval_prob_hcat( - ps::Vector{<:IntervalProbabilities{R, VR, MR}}, -) where {R, VR, MR <: AbstractMatrix{R}} - l = mapreduce(lower, hcat, ps) - g = mapreduce(gap, hcat, ps) - - sl = mapreduce(sum_lower, vcat, ps) - - lengths = map(num_source, ps) - stateptr = Int32[1; cumsum(lengths) .+ 1] - - return IntervalProbabilities(l, g, sl), stateptr -end - -function Base.getindex(p::IntervalProbabilities, J) - # Select by columns only! - l = lower(p)[:, J] - g = gap(p)[:, J] - sum = sum_lower(p)[J] - - return IntervalProbabilities(l, g, sum) -end diff --git a/src/probabilities/MixtureIntervalProbabilities.jl b/src/probabilities/MixtureIntervalProbabilities.jl deleted file mode 100644 index 11abad0d..00000000 --- a/src/probabilities/MixtureIntervalProbabilities.jl +++ /dev/null @@ -1,167 +0,0 @@ -""" - MixtureIntervalProbabilities{N, P <: OrthogonalIntervalProbabilities, Q <: IntervalProbabilities} - -A tuple of `OrthogonalIntervalProbabilities` for independent transition probabilities in a mixture that all share -the same source/action pairs, and target states. See [`OrthogonalIntervalProbabilities`](@ref) for more information on the structure of the transition probabilities -for each model in the mixture. The mixture is weighted by an [`IntervalProbabilities`](@ref) ambiguity set, called `weighting_probs`. - -### Fields -- `mixture_probs::NTuple{N, P}`: A tuple of `OrthogonalIntervalProbabilities` transition probabilities along each axis. -- `weighting_probs::Q`: The weighting ambiguity set for the mixture. - -### Examples -Below is a simple example of a mixture of two `OrthogonalIntervalProbabilities` with one dimension and the same source/action pairs and target states, -and a weighting ambiguity set. -```jldoctest -prob1 = OrthogonalIntervalProbabilities( - ( - IntervalProbabilities(; - lower = [ - 0.0 0.5 - 0.1 0.3 - 0.2 0.1 - ], - upper = [ - 0.5 0.7 - 0.6 0.5 - 0.7 0.3 - ], - ), - ), - (Int32(2),), -) -prob2 = OrthogonalIntervalProbabilities( - ( - IntervalProbabilities(; - lower = [ - 0.1 0.4 - 0.2 0.2 - 0.3 0.0 - ], - upper = [ - 0.4 0.6 - 0.5 0.4 - 0.6 0.2 - ], - ), - ), - (Int32(2),), -) -weighting_probs = IntervalProbabilities(; lower = [ - 0.3 0.5 - 0.4 0.3 -], upper = [ - 0.8 0.7 - 0.7 0.5 -]) -mixture_prob = MixtureIntervalProbabilities((prob1, prob2), weighting_probs) -``` -""" -struct MixtureIntervalProbabilities{ - N, - P <: OrthogonalIntervalProbabilities, - Q <: IntervalProbabilities, -} <: AbstractIntervalProbabilities - mixture_probs::NTuple{N, P} - weighting_probs::Q - - function MixtureIntervalProbabilities( - mixture_probs::NTuple{N, P}, - weighting_probs::Q, - ) where {N, P <: OrthogonalIntervalProbabilities, Q <: IntervalProbabilities} - _source_shape, _num_source = - source_shape(first(mixture_probs)), num_source(first(mixture_probs)) - - for i in 2:N - source_shape_i, num_source_i = - source_shape(mixture_probs[i]), num_source(mixture_probs[i]) - - if source_shape_i != _source_shape - throw( - DimensionMismatch( - "All mixture probabilities must have the same source shape", - ), - ) - end - - if num_source_i != _num_source - throw( - DimensionMismatch( - "All mixture probabilities must have the same number of source/action pairs", - ), - ) - end - end - - if num_target(weighting_probs) != N - throw( - DimensionMismatch( - "The dimensionality of the weighting ambiguity set must be equal to the number of mixture probabilities", - ), - ) - end - - if num_source(weighting_probs) != _num_source - throw( - DimensionMismatch( - "The number of source/action pairs in the weighting ambiguity set must be equal to the number of source/action pairs in the mixture probabilities", - ), - ) - end - - new{N, P, Q}(mixture_probs, weighting_probs) - end -end - -""" - num_source(p::MixtureIntervalProbabilities) - -Return the number of source states or source/action pairs. -""" -num_source(p::MixtureIntervalProbabilities) = num_source(first(p.mixture_probs)) -source_shape(p::MixtureIntervalProbabilities) = source_shape(first(p.mixture_probs)) - -""" - mixture_probs(p::MixtureIntervalProbabilities) - -Return the tuple of `OrthogonalIntervalProbabilities` transition probabilities. -""" -mixture_probs(p::MixtureIntervalProbabilities) = p.mixture_probs - -""" - mixture_probs(p::MixtureIntervalProbabilities, k) - -Return ``k``-th `OrthogonalIntervalProbabilities` transition probabilities. -""" -mixture_probs(p::MixtureIntervalProbabilities, k) = p.mixture_probs[k] - -""" - weighting_probs(p::MixtureIntervalProbabilities) - -Return the `IntervalProbabilities` weighting ambiguity set. -""" -weighting_probs(p::MixtureIntervalProbabilities) = p.weighting_probs - -""" - axes_source(p::MixtureIntervalProbabilities) - -Return the valid range of indices for the source states or source/action pairs. -""" -axes_source(p::MixtureIntervalProbabilities) = axes_source(first(p.mixture_probs)) - -""" - num_target(p::MixtureIntervalProbabilities) - -Return the number of target states along each marginal. -""" -num_target(p::MixtureIntervalProbabilities) = num_target(first(p.mixture_probs)) - -stateptr(p::MixtureIntervalProbabilities) = stateptr(first(p.mixture_probs)) -Base.ndims(p::MixtureIntervalProbabilities{N}) where {N} = N - -Base.getindex(p::MixtureIntervalProbabilities, k) = mixture_probs(p, k) -Base.lastindex(p::MixtureIntervalProbabilities) = ndims(p) -Base.firstindex(p::MixtureIntervalProbabilities) = 1 -Base.length(p::MixtureIntervalProbabilities) = ndims(p) -Base.iterate(p::MixtureIntervalProbabilities) = (p[1], 2) -Base.iterate(p::MixtureIntervalProbabilities, k) = k > ndims(p) ? nothing : (p[k], k + 1) diff --git a/src/probabilities/OrthogonalIntervalProbabilities.jl b/src/probabilities/OrthogonalIntervalProbabilities.jl deleted file mode 100644 index 59c405ff..00000000 --- a/src/probabilities/OrthogonalIntervalProbabilities.jl +++ /dev/null @@ -1,186 +0,0 @@ -""" - OrthogonalIntervalProbabilities{N, P <: IntervalProbabilities} - -A tuple of `IntervalProbabilities` for (marginal) transition probabilities from all source/action pairs to the target states along each axis, -with target states/marginals on the rows and source states or source/action pairs on the columns. The source states are ordered in -a column-major order, i.e., the first axis of source states is the fastest, similar to the ordering of a multi-dimensional array in Julia. -E.g. for an `OrthogonalIntervalProbabilities` with `source_dims == (3, 3, 3)` and 2 actions for each source state ``\\{a_1, a_2\\}``, -the columns in order represent the collowing: -```math - ((1, 1, 1), a_1), ((1, 1, 1), a_2), (2, 1, 1), a_1), ((2, 1, 1), a_2), ..., ((3, 3, 3), a_1), ((3, 3, 3), a_2). -``` -The number of target states correspond to the number of rows in the transition probabilities of each axis. - - -### Fields -- `probs::NTuple{N, P}`: A tuple of `IntervalProbabilities` for (marginal) transition probabilities along each axis. -- `source_dims::NTuple{N, Int32}`: The dimensions of the orthogonal probabilities for the source axis. This is flattened to a single dimension for indexing. - -### Examples -An example of OrthogonalIntervalProbabilities with 3 axes and 3 states for each axis, only one action per state. -Therefore, the `source_dims` is (3, 3, 3) and the number of columns of the transition probabilities is 27. - -```jldoctest -lower1 = [ - 1/15 3/10 1/15 3/10 1/30 1/3 7/30 4/15 1/6 1/5 1/10 1/5 0 7/30 7/30 1/5 2/15 1/6 1/10 1/30 1/10 1/15 1/10 1/15 4/15 4/15 1/3 - 1/5 4/15 1/10 1/5 3/10 3/10 1/10 1/15 3/10 3/10 7/30 1/5 1/10 1/5 1/5 1/30 1/5 3/10 1/5 1/5 1/10 1/30 4/15 1/10 1/5 1/6 7/30 - 4/15 1/30 1/5 1/5 7/30 4/15 2/15 7/30 1/5 1/3 2/15 1/6 1/6 1/3 4/15 3/10 1/30 3/10 3/10 1/10 1/15 1/30 2/15 1/6 1/5 1/10 4/15 -] -upper1 = [ - 7/15 17/30 13/30 3/5 17/30 17/30 17/30 13/30 3/5 2/3 11/30 7/15 0 1/2 17/30 13/30 7/15 13/30 17/30 13/30 2/5 2/5 2/3 2/5 17/30 2/5 19/30 - 8/15 1/2 3/5 7/15 8/15 17/30 2/3 17/30 11/30 7/15 19/30 19/30 13/15 1/2 17/30 13/30 3/5 11/30 8/15 7/15 7/15 13/30 8/15 2/5 8/15 17/30 3/5 - 11/30 1/3 2/5 8/15 7/15 3/5 2/3 17/30 2/3 8/15 2/15 3/5 2/3 3/5 17/30 2/3 7/15 8/15 2/5 2/5 11/30 17/30 17/30 1/2 2/5 19/30 13/30 -] -prob1 = IntervalProbabilities(; lower = lower1, upper = upper1) - -lower2 = [ - 1/10 1/15 3/10 0 1/6 1/15 1/15 1/6 1/6 1/30 1/10 1/10 1/3 2/15 3/10 4/15 2/15 2/15 1/6 7/30 1/15 2/15 1/10 1/3 7/30 1/30 7/30 - 3/10 1/5 3/10 2/15 0 1/30 0 1/15 1/30 7/30 1/30 1/15 7/30 1/15 1/6 1/30 1/10 1/15 3/10 0 3/10 1/6 3/10 1/5 0 7/30 2/15 - 3/10 4/15 1/10 3/10 2/15 1/3 3/10 1/10 1/6 3/10 7/30 1/6 1/15 1/15 1/10 1/5 1/5 4/15 1/15 1/3 2/15 1/15 1/5 1/5 1/15 7/30 1/15 -] -upper2 = [ - 2/5 17/30 3/5 11/30 3/5 7/15 19/30 2/5 3/5 2/3 2/3 8/15 8/15 19/30 8/15 8/15 13/30 13/30 13/30 17/30 17/30 13/30 11/30 19/30 8/15 2/5 8/15 - 1/3 13/30 11/30 2/5 2/3 2/3 0 13/30 1/2 17/30 17/30 1/3 2/5 1/3 13/30 11/30 8/15 1/3 1/2 8/15 8/15 8/15 8/15 2/5 3/5 2/3 13/30 - 17/30 3/5 8/15 1/2 7/15 1/2 2/3 17/30 11/30 2/5 1/2 7/15 2/5 17/30 11/30 2/5 11/30 2/3 1/3 2/3 17/30 8/15 17/30 3/5 2/5 19/30 11/30 -] -prob2 = IntervalProbabilities(; lower = lower2, upper = upper2) - -lower3 = [ - 4/15 1/5 3/10 3/10 4/15 7/30 1/5 4/15 7/30 1/6 1/5 0 1/15 1/30 3/10 1/3 2/15 1/15 7/30 4/15 1/10 1/3 1/5 7/30 1/30 1/5 7/30 - 2/15 4/15 1/10 1/30 7/30 2/15 1/15 1/30 3/10 1/3 1/5 1/10 2/15 1/30 2/15 4/15 0 4/15 1/5 4/15 1/10 1/10 1/3 7/30 3/10 1/3 3/10 - 1/5 1/3 3/10 1/10 1/15 1/10 1/30 1/5 2/15 7/30 1/3 2/15 1/10 1/6 3/10 1/5 7/30 1/30 0 1/30 1/15 2/15 1/6 7/30 4/15 4/15 7/30 -] -upper3 = [ - 3/5 17/30 1/2 3/5 19/30 2/5 8/15 1/3 11/30 2/5 17/30 13/30 2/5 3/5 3/5 11/30 1/2 11/30 2/3 17/30 3/5 7/15 19/30 1/2 3/5 1/3 19/30 - 3/5 2/3 13/30 19/30 1/3 2/5 17/30 7/15 11/30 3/5 19/30 7/15 2/5 8/15 17/30 11/30 19/30 13/30 2/3 17/30 8/15 13/30 13/30 3/5 1/2 8/15 8/15 - 3/5 2/3 1/2 1/2 2/3 7/15 3/5 3/5 1/2 1/3 2/5 8/15 2/5 11/30 1/3 8/15 7/15 13/30 0 2/5 11/30 19/30 19/30 2/5 1/2 7/15 7/15 -] -prob3 = IntervalProbabilities(; lower = lower3, upper = upper3) - -prob = OrthogonalIntervalProbabilities((prob1, prob2, prob3), (Int32(3), Int32(3), Int32(3))) -``` -""" -struct OrthogonalIntervalProbabilities{N, P <: IntervalProbabilities} <: - AbstractIntervalProbabilities - probs::NTuple{N, P} - source_dims::NTuple{N, Int32} - - function OrthogonalIntervalProbabilities( - probs::NTuple{N, P}, - source_dims::NTuple{N, Int32}, - ) where {N, P} - source_action_pairs = num_source(first(probs)) - - for i in 2:N - if num_source(probs[i]) != source_action_pairs - throw( - DimensionMismatch( - "The number of source states or source/action pairs must be the same for all axes.", - ), - ) - end - end - - new{N, P}(probs, source_dims) - end -end - -""" - lower(p::OrthogonalIntervalProbabilities, l) - -Return the lower bound transition probabilities from a source state or source/action pair to a target axis. -""" -lower(p::OrthogonalIntervalProbabilities, l) = lower(p.probs[l]) - -""" - lower(p::OrthogonalIntervalProbabilities, l, i, j) - -Return the lower bound transition probabilities from a source state or source/action pair to a target state. -""" -lower(p::OrthogonalIntervalProbabilities, l, i, j) = lower(p.probs[l], i, j) - -""" - upper(p::OrthogonalIntervalProbabilities, l) - -Return the upper bound transition probabilities from a source state or source/action pair to a target state. - -!!! note - It is not recommended to use this function for the hot loop of O-maximization. Because the [`IntervalProbabilities`](@ref) - stores the lower and gap transition probabilities, fetching the upper bound requires allocation and computation. -""" -upper(p::OrthogonalIntervalProbabilities, l) = upper(p.probs[l]) - -""" - upper(p::OrthogonalIntervalProbabilities, l, i, j) - -Return the upper bound transition probabilities from a source state or source/action pair to a target state. - -!!! note - It is not recommended to use this function for the hot loop of O-maximization. Because the [`IntervalProbabilities`](@ref) - stores the lower and gap transition probabilities, fetching the upper bound requires allocation and computation. -""" -upper(p::OrthogonalIntervalProbabilities, l, i, j) = upper(p.probs[l], i, j) - -""" - gap(p::OrthogonalIntervalProbabilities, l) - -Return the gap between upper and lower bound transition probabilities from a source state or source/action pair to a target axis. -""" -gap(p::OrthogonalIntervalProbabilities, l) = gap(p.probs[l]) - -""" - gap(p::OrthogonalIntervalProbabilities, l, i, j) - -Return the gap between upper and lower bound transition probabilities from a source state or source/action pair to a target state. -""" -gap(p::OrthogonalIntervalProbabilities, l, i, j) = gap(p.probs[l], i, j) - -""" - sum_lower(p::OrthogonalIntervalProbabilities, l) - -Return the sum of lower bound transition probabilities from a source state or source/action pair to all target states on one axis. -This is useful in efficiently implementing O-maximization, where we start with a lower bound probability assignment -and iteratively, according to the ordering, adding the gap until the sum of probabilities is 1. -""" -sum_lower(p::OrthogonalIntervalProbabilities, l) = sum_lower(p.probs[l]) - -""" - sum_lower(p::OrthogonalIntervalProbabilities, l, j) - -Return the sum of lower bound transition probabilities from a source state or source/action pair to all target states. -This is useful in efficiently implementing O-maximization, where we start with a lower bound probability assignment -and iteratively, according to the ordering, adding the gap until the sum of probabilities is 1. -""" -sum_lower(p::OrthogonalIntervalProbabilities, l, j) = sum_lower(p.probs[l], j) - -""" - num_source(p::OrthogonalIntervalProbabilities) - -Return the number of source states or source/action pairs. -""" -num_source(p::OrthogonalIntervalProbabilities) = num_source(first(p.probs)) -source_shape(p::OrthogonalIntervalProbabilities) = p.source_dims - -""" - axes_source(p::OrthogonalIntervalProbabilities) - -Return the valid range of indices for the source states or source/action pairs. -""" -axes_source(p::OrthogonalIntervalProbabilities) = axes_source(first(p.probs)) - -""" - num_target(p::OrthogonalIntervalProbabilities) - -Return the number of target states along each marginal. -""" -num_target(p::OrthogonalIntervalProbabilities) = ntuple(i -> num_target(p[i]), ndims(p)) - -stateptr(p::OrthogonalIntervalProbabilities) = UnitRange{Int32}(1, num_source(p) + 1) -Base.ndims(p::OrthogonalIntervalProbabilities{N}) where {N} = N - -Base.getindex(p::OrthogonalIntervalProbabilities, i) = p.probs[i] -Base.lastindex(p::OrthogonalIntervalProbabilities) = ndims(p) -Base.firstindex(p::OrthogonalIntervalProbabilities) = 1 -Base.length(p::OrthogonalIntervalProbabilities) = ndims(p) -Base.iterate(p::OrthogonalIntervalProbabilities) = (p[1], 2) -Base.iterate(p::OrthogonalIntervalProbabilities, i) = i > ndims(p) ? nothing : (p[i], i + 1) diff --git a/src/probabilities/probabilities.jl b/src/probabilities/probabilities.jl index 9456e601..5cf59748 100644 --- a/src/probabilities/probabilities.jl +++ b/src/probabilities/probabilities.jl @@ -1,15 +1,9 @@ -abstract type AbstractIntervalProbabilities end -export lower, upper, gap, sum_lower -export num_source, axes_source, num_target, axes_target +abstract type AbstractMarginal end -include("IntervalProbabilities.jl") -export IntervalProbabilities +export lower, upper, gap -include("OrthogonalIntervalProbabilities.jl") -export OrthogonalIntervalProbabilities - -include("MixtureIntervalProbabilities.jl") -export MixtureIntervalProbabilities, mixture_probs, weighting_probs +include("IntervalAmbiguitySets.jl") +export IntervalAmbiguitySets include("TransitionFunction.jl") export TransitionFunction, transition diff --git a/src/problem.jl b/src/problem.jl index 8ebe9bae..e3e98714 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -4,6 +4,7 @@ abstract type AbstractIntervalMDPProblem end # Verification # ################# +# Problem """ VerificationProblem{S <: StochasticProcess, F <: Specification, C <: AbstractStrategy} @@ -59,6 +60,7 @@ Return the strategy of a problem, if provided. """ strategy(prob::VerificationProblem) = prob.strategy +# Solution struct VerificationSolution{R, MR <: AbstractArray{R}, D} value_function::MR residual::MR @@ -95,6 +97,7 @@ Base.iterate(s::VerificationSolution, args...) = # Control synthesis # ##################### +# Problem """ ControlSynthesisProblem{S <: StochasticProcess, F <: Specification} @@ -132,6 +135,7 @@ Return the specification of a problem. """ specification(prob::ControlSynthesisProblem) = prob.spec +# Solution struct ControlSynthesisSolution{C <: AbstractStrategy, R, MR <: AbstractArray{R}, D} strategy::C value_function::MR diff --git a/src/strategy_cache.jl b/src/strategy_cache.jl index e6d3759a..26585d87 100644 --- a/src/strategy_cache.jl +++ b/src/strategy_cache.jl @@ -15,7 +15,7 @@ struct NoStrategyCache <: OptimizingStrategyCache end function construct_strategy_cache( ::Union{ - IntervalProbabilities, + IntervalAmbiguitySet, OrthogonalIntervalProbabilities, MixtureIntervalProbabilities, StochasticProcess, diff --git a/src/utils.jl b/src/utils.jl index 0aae0485..3ef303e9 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -11,7 +11,7 @@ arrayfactory(prob::MixtureIntervalProbabilities, T, num_states) = arrayfactory(first(prob), T, num_states) arrayfactory(prob::OrthogonalIntervalProbabilities, T, num_states) = arrayfactory(first(prob), T, num_states) -arrayfactory(prob::IntervalProbabilities, T, num_states) = +arrayfactory(prob::IntervalAmbiguitySet, T, num_states) = arrayfactory(gap(prob), T, num_states) arrayfactory(::MR, T, num_states) where {MR <: AbstractArray} = zeros(T, num_states) @@ -19,5 +19,5 @@ valuetype(mp::ProductProcess) = valuetype(markov_process(mp)) valuetype(mp::IntervalMarkovProcess) = valuetype(transition_prob(mp)) valuetype(prob::MixtureIntervalProbabilities) = valuetype(first(prob)) valuetype(prob::OrthogonalIntervalProbabilities) = valuetype(first(prob)) -valuetype(prob::IntervalProbabilities) = valuetype(gap(prob)) +valuetype(prob::IntervalAmbiguitySet) = valuetype(gap(prob)) valuetype(::MR) where {R, MR <: AbstractArray{R}} = R diff --git a/src/workspace.jl b/src/workspace.jl index 725336f3..ca612dde 100644 --- a/src/workspace.jl +++ b/src/workspace.jl @@ -88,7 +88,7 @@ The workspace type is determined by the type and size of the transition probabil as well as the number of threads available. """ function construct_workspace( - prob::IntervalProbabilities{R, VR, MR}, + prob::IntervalAmbiguitySet{R, VR, MR}, max_actions = 1; threshold = 10, ) where {R, VR, MR <: AbstractMatrix{R}} @@ -129,7 +129,7 @@ end Base.getindex(ws::ThreadedSparseWorkspace, i) = ws.thread_workspaces[i] function construct_workspace( - prob::IntervalProbabilities{R, VR, MR}, + prob::IntervalAmbiguitySet{R, VR, MR}, max_actions = 1; threshold = 10, ) where {R, VR, MR <: AbstractSparseMatrix{R}} @@ -153,7 +153,7 @@ struct DenseOrthogonalWorkspace{N, M, T <: Real} <: SimpleOrthogonalWorkspace end function DenseOrthogonalWorkspace( - p::OrthogonalIntervalProbabilities{N, <:IntervalProbabilities{R}}, + p::OrthogonalIntervalProbabilities{N, <:IntervalAmbiguitySet{R}}, max_actions, ) where {N, R} pns = num_target(p) @@ -182,7 +182,7 @@ struct ThreadedDenseOrthogonalWorkspace{N, M, T} end function ThreadedDenseOrthogonalWorkspace( - p::OrthogonalIntervalProbabilities{N, <:IntervalProbabilities{R}}, + p::OrthogonalIntervalProbabilities{N, <:IntervalAmbiguitySet{R}}, max_actions, ) where {N, R} nthreads = Threads.nthreads() @@ -222,7 +222,7 @@ The workspace type is determined by the type and size of the transition probabil as well as the number of threads available. """ function construct_workspace( - p::OrthogonalIntervalProbabilities{N, <:IntervalProbabilities{R, VR, MR}}, + p::OrthogonalIntervalProbabilities{N, <:IntervalAmbiguitySet{R, VR, MR}}, max_actions = 1, ) where {N, R, VR, MR <: AbstractMatrix{R}} if Threads.nthreads() == 1 @@ -243,7 +243,7 @@ scratch(ws::SparseOrthogonalWorkspace) = ws.scratch actions(ws::SparseOrthogonalWorkspace) = ws.actions function SparseOrthogonalWorkspace( - p::OrthogonalIntervalProbabilities{N, <:IntervalProbabilities{R, VR, MR}}, + p::OrthogonalIntervalProbabilities{N, <:IntervalAmbiguitySet{R, VR, MR}}, max_actions, ) where {N, R, VR, MR <: AbstractSparseMatrix{R}} max_nonzeros_per_prob = [maximum(map(nnz, eachcol(gap(pᵢ)))) for pᵢ in p] @@ -282,7 +282,7 @@ The workspace type is determined by the type and size of the transition probabil as well as the number of threads available. """ function construct_workspace( - p::OrthogonalIntervalProbabilities{N, <:IntervalProbabilities{R, VR, MR}}, + p::OrthogonalIntervalProbabilities{N, <:IntervalAmbiguitySet{R, VR, MR}}, max_actions = 1, ) where {N, R, VR, MR <: AbstractSparseMatrix{R}} if Threads.nthreads() == 1 @@ -308,7 +308,7 @@ actions(ws::MixtureWorkspace) = actions(ws.orthogonal_workspace) function MixtureWorkspace( p::MixtureIntervalProbabilities{ N, - <:OrthogonalIntervalProbabilities{M, <:IntervalProbabilities{R, VR, MR}}, + <:OrthogonalIntervalProbabilities{M, <:IntervalAmbiguitySet{R, VR, MR}}, }, max_actions, ) where {N, M, R, VR, MR <: AbstractMatrix{R}} @@ -326,7 +326,7 @@ end function MixtureWorkspace( p::MixtureIntervalProbabilities{ N, - <:OrthogonalIntervalProbabilities{M, <:IntervalProbabilities{R, VR, MR}}, + <:OrthogonalIntervalProbabilities{M, <:IntervalAmbiguitySet{R, VR, MR}}, }, max_actions, ) where {N, M, R, VR, MR <: AbstractSparseMatrix{R}} @@ -363,7 +363,7 @@ end function ThreadedMixtureWorkspace( p::MixtureIntervalProbabilities{ N, - <:OrthogonalIntervalProbabilities{M, <:IntervalProbabilities{R, VR, MR}}, + <:OrthogonalIntervalProbabilities{M, <:IntervalAmbiguitySet{R, VR, MR}}, }, max_actions, ) where {N, M, R, VR, MR <: AbstractMatrix{R}} @@ -403,7 +403,7 @@ end function ThreadedMixtureWorkspace( p::MixtureIntervalProbabilities{ N, - <:OrthogonalIntervalProbabilities{M, <:IntervalProbabilities{R, VR, MR}}, + <:OrthogonalIntervalProbabilities{M, <:IntervalAmbiguitySet{R, VR, MR}}, }, max_actions, ) where {N, M, R, VR, MR <: AbstractSparseMatrix{R}} @@ -453,7 +453,7 @@ as well as the number of threads available. function construct_workspace( p::MixtureIntervalProbabilities{ N, - <:OrthogonalIntervalProbabilities{M, <:IntervalProbabilities{R, VR, MR}}, + <:OrthogonalIntervalProbabilities{M, <:IntervalAmbiguitySet{R, VR, MR}}, }, max_actions = 1, ) where {N, M, R, VR, MR <: Union{AbstractMatrix{R}, AbstractSparseMatrix{R}}} From 0db849f930a3d418ab481f89a395299deeac04f8 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Sun, 31 Aug 2025 21:56:21 +0200 Subject: [PATCH 02/71] WIP: IMC kernels working --- Project.toml | 4 +- ext/cuda/array.jl | 43 - src/IntervalMDP.jl | 2 +- src/bellman.jl | 884 +++++++++--------- src/cuda.jl | 22 +- .../FactoredRobustMarkovDecisionProcess.jl | 65 +- src/models/IntervalMarkovChain.jl | 17 + src/models/IntervalMarkovDecisionProcess.jl | 17 + src/models/IntervalMarkovProcess.jl | 10 +- src/models/ProductProcess.jl | 8 +- src/models/models.jl | 4 +- src/probabilities/IntervalAmbiguitySets.jl | 140 ++- src/probabilities/Marginal.jl | 71 ++ src/probabilities/probabilities.jl | 25 +- src/robust_value_iteration.jl | 2 +- src/specification.jl | 4 +- src/strategy_cache.jl | 13 +- src/utils.jl | 26 +- src/workspace.jl | 467 ++------- test/base/base.jl | 16 +- test/base/bellman.jl | 28 +- test/base/vi.jl | 2 +- test/runtests.jl | 4 +- test/sparse/bellman.jl | 28 +- test/sparse/sparse.jl | 9 +- test/sparse/vi.jl | 2 +- 26 files changed, 836 insertions(+), 1077 deletions(-) create mode 100644 src/models/IntervalMarkovChain.jl create mode 100644 src/models/IntervalMarkovDecisionProcess.jl create mode 100644 src/probabilities/Marginal.jl diff --git a/Project.toml b/Project.toml index bd6a0a81..98d0c149 100644 --- a/Project.toml +++ b/Project.toml @@ -16,8 +16,8 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" -[extensions] -IntervalMDPCudaExt = ["Adapt", "CUDA", "GPUArrays", "LLVM"] +# [extensions] +# IntervalMDPCudaExt = ["Adapt", "CUDA", "GPUArrays", "LLVM"] [compat] Adapt = "4" diff --git a/ext/cuda/array.jl b/ext/cuda/array.jl index d3ca11c2..cf6183ab 100644 --- a/ext/cuda/array.jl +++ b/ext/cuda/array.jl @@ -1,46 +1,3 @@ -function IntervalMDP.maxdiff(stateptr::CuVector{Int32}) - return reducediff(max, stateptr, typemin(Int32)) -end - -function IntervalMDP.mindiff(stateptr::CuVector{Int32}) - return reducediff(min, stateptr, typemax(Int32)) -end - -function reducediff(op, stateptr::CuVector{Int32}, neutral) - ret_arr = CuArray{Int32}(undef, 1) - kernel = @cuda launch = false reducediff_kernel!(op, stateptr, neutral, ret_arr) - - config = launch_configuration(kernel.fun) - max_threads = prevwarp(device(), config.threads) - wanted_threads = min(1024, nextwarp(device(), length(stateptr) - 1)) - - threads = min(max_threads, wanted_threads) - blocks = 1 - - kernel(op, stateptr, neutral, ret_arr; blocks = blocks, threads = threads) - - return CUDA.@allowscalar ret_arr[1] -end - -function reducediff_kernel!(op, stateptr, neutral, retarr) - diff = neutral - - i = threadIdx().x - @inbounds while i <= length(stateptr) - 1 - diff = op(diff, stateptr[i + 1] - stateptr[i]) - i += blockDim().x - end - - shuffle = Val(true) - diff = CUDA.reduce_block(op, diff, neutral, shuffle) - - if threadIdx().x == 1 - @inbounds retarr[1] = diff - end - - return -end - # This is type piracy - please port upstream to CUDA when FixedSparseCSC are stable. CUDA.CUSPARSE.CuSparseMatrixCSC{Tv, Ti}(M::SparseArrays.FixedSparseCSC) where {Tv, Ti} = CuSparseMatrixCSC{Tv, Ti}( diff --git a/src/IntervalMDP.jl b/src/IntervalMDP.jl index 1787d800..648d02e1 100644 --- a/src/IntervalMDP.jl +++ b/src/IntervalMDP.jl @@ -59,6 +59,6 @@ export RobustValueIteration include("robust_value_iteration.jl") ### Saving and loading models -include("Data/Data.jl") +# include("Data/Data.jl") end diff --git a/src/bellman.jl b/src/bellman.jl index beb616c4..72a78b67 100644 --- a/src/bellman.jl +++ b/src/bellman.jl @@ -63,6 +63,7 @@ Vcur = IntervalMDP.bellman(Vprev, model; upper_bound = false) """ function bellman(V, model; upper_bound = false, maximize = true) Vres = similar(V, source_shape(model)) + return bellman!(Vres, V, model; upper_bound = upper_bound, maximize = maximize) end @@ -139,6 +140,7 @@ function bellman! end function bellman!(Vres, V, model; upper_bound = false, maximize = true) workspace = construct_workspace(model) strategy_cache = construct_strategy_cache(model) + return bellman!( workspace, strategy_cache, @@ -164,8 +166,7 @@ function bellman!( strategy_cache, Vres, V, - transition_prob(model), - stateptr(model); + model; upper_bound = upper_bound, maximize = maximize, ) @@ -191,7 +192,7 @@ function bellman!( # Select the value function for the current DFA state # according to the appropriate DFA transition function - map!(W, CartesianIndices(product_num_states(mp))) do idx + map!(W, CartesianIndices(state_variables(mp))) do idx return V[idx, dfa[state, lf[idx]]] end @@ -243,25 +244,25 @@ end # Non-threaded function _bellman_helper!( - workspace::Union{DenseWorkspace, SparseWorkspace}, + workspace::Union{DenseIntervalWorkspace, SparseIntervalWorkspace}, strategy_cache::AbstractStrategyCache, Vres, V, - prob::IntervalAmbiguitySet, - stateptr; + model; upper_bound = false, maximize = true, ) bellman_precomputation!(workspace, V, upper_bound) - for jₛ in 1:(length(stateptr) - 1) + marginal = marginals(model)[1] + + for jₛ in CartesianIndices(source_shape(marginal)) state_bellman!( workspace, strategy_cache, Vres, V, - prob, - stateptr, + marginal, jₛ, upper_bound, maximize, @@ -273,26 +274,26 @@ end # Threaded function _bellman_helper!( - workspace::Union{ThreadedDenseWorkspace, ThreadedSparseWorkspace}, + workspace::Union{ThreadedDenseIntervalWorkspace, ThreadedSparseIntervalWorkspace}, strategy_cache::AbstractStrategyCache, Vres, V, - prob::IntervalAmbiguitySet, - stateptr; + model; upper_bound = false, maximize = true, ) bellman_precomputation!(workspace, V, upper_bound) - @threadstid tid for jₛ in 1:(length(stateptr) - 1) + marginal = marginals(model)[1] + + @threadstid tid for jₛ in CartesianIndices(source_shape(marginal)) @inbounds ws = workspace[tid] state_bellman!( ws, strategy_cache, Vres, V, - prob, - stateptr, + marginal, jₛ, upper_bound, maximize, @@ -303,7 +304,7 @@ function _bellman_helper!( end function bellman_precomputation!( - workspace::Union{DenseWorkspace, ThreadedDenseWorkspace}, + workspace::Union{DenseIntervalWorkspace, ThreadedDenseIntervalWorkspace}, V, upper_bound, ) @@ -312,82 +313,78 @@ function bellman_precomputation!( end bellman_precomputation!( - workspace::Union{SparseWorkspace, ThreadedSparseWorkspace}, + workspace::Union{SparseIntervalWorkspace, ThreadedSparseIntervalWorkspace}, V, upper_bound, ) = nothing function state_bellman!( - workspace::Union{DenseWorkspace, SparseWorkspace}, + workspace::Union{DenseIntervalWorkspace, SparseIntervalWorkspace}, strategy_cache::OptimizingStrategyCache, Vres, V, - prob, - stateptr, + marginal, jₛ, upper_bound, maximize, ) @inbounds begin - s₁, s₂ = stateptr[jₛ], stateptr[jₛ + 1] - actions = @view workspace.actions[1:(s₂ - s₁)] - - for (i, jₐ) in enumerate(s₁:(s₂ - 1)) - actions[i] = state_action_bellman(workspace, V, prob, jₐ, upper_bound) + for jₐ in CartesianIndices(action_shape(marginal)) + ambiguity_set = marginal[jₛ, jₐ] + budget = workspace.budget[sub2ind(marginal, jₛ, jₐ)] + workspace.actions[jₐ] = state_action_bellman(workspace, V, ambiguity_set, budget, upper_bound) end - Vres[jₛ] = extract_strategy!(strategy_cache, actions, V, jₛ, maximize) + Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, maximize) end end function state_bellman!( - workspace::Union{DenseWorkspace, SparseWorkspace}, + workspace::Union{DenseIntervalWorkspace, SparseIntervalWorkspace}, strategy_cache::NonOptimizingStrategyCache, Vres, V, - prob, - stateptr, + marginal, jₛ, upper_bound, maximize, ) @inbounds begin - s₁ = stateptr[jₛ] - jₐ = s₁ + strategy_cache[jₛ] - 1 - Vres[jₛ] = state_action_bellman(workspace, V, prob, jₐ, upper_bound) + jₐ = strategy_cache[jₛ] + ambiguity_set = marginal[jₛ, jₐ] + budget = workspace.budget[sub2ind(marginal, jₛ, jₐ)] + Vres[jₛ] = state_action_bellman(workspace, V, ambiguity_set, budget, upper_bound) end end Base.@propagate_inbounds function state_action_bellman( - workspace::DenseWorkspace, + workspace::DenseIntervalWorkspace, V, - prob, - jₐ, + ambiguity_set, + budget, upper_bound, ) - return dense_sorted_state_action_bellman(V, prob, jₐ, permutation(workspace)) + return dense_sorted_state_action_bellman(V, ambiguity_set, budget, permutation(workspace)) end -Base.@propagate_inbounds function dense_sorted_state_action_bellman(V, prob, jₐ, perm) - return dot(V, lower(prob, :, jₐ)) + - gap_value(V, gap(prob, :, jₐ), sum_lower(prob, jₐ), perm) +Base.@propagate_inbounds function dense_sorted_state_action_bellman(V, ambiguity_set, budget, perm) + return dot(V, lower(ambiguity_set)) + gap_value(V, gap(ambiguity_set), budget, perm) end Base.@propagate_inbounds function gap_value( V::AbstractVector{T}, gap::VR, - sum_lower, + budget, perm, ) where {T, VR <: AbstractVector} - remaining = one(T) - sum_lower res = zero(T) @inbounds for i in perm - p = min(remaining, gap[i]) + p = min(budget, gap[i]) res += p * V[i] - remaining -= p - if remaining <= zero(T) + budget -= p + if budget <= zero(T) break end end @@ -396,41 +393,36 @@ Base.@propagate_inbounds function gap_value( end Base.@propagate_inbounds function state_action_bellman( - workspace::SparseWorkspace, + workspace::SparseIntervalWorkspace, V, - prob, - jₐ, + ambiguity_set, + budget, upper_bound, ) - lowerⱼ = lower(prob, :, jₐ) - gapⱼ = gap(prob, :, jₐ) - used = sum_lower(prob)[jₐ] - - Vp_workspace = @view workspace.values_gaps[1:nnz(gapⱼ)] - for (i, (v, p)) in - enumerate(zip(@view(V[SparseArrays.nonzeroinds(gapⱼ)]), nonzeros(gapⱼ))) + Vp_workspace = @view workspace.values_gaps[1:nnz(ambiguity_set)] + Vnonzero = @view V[support(ambiguity_set)] + for (i, (v, p)) in enumerate(zip(Vnonzero, nonzeros(gap(ambiguity_set)))) Vp_workspace[i] = (v, p) end # rev=true for upper bound sort!(Vp_workspace; rev = upper_bound, by = first, scratch = scratch(workspace)) - return dot(V, lowerⱼ) + gap_value(Vp_workspace, used) + return dot(V, lower(ambiguity_set)) + gap_value(Vp_workspace, budget) end Base.@propagate_inbounds function gap_value( Vp::VP, - sum_lower, + budget, ) where {T, VP <: AbstractVector{<:Tuple{T, <:Real}}} - remaining = one(T) - sum_lower res = zero(T) for (V, p) in Vp - p = min(remaining, p) + p = min(budget, p) res += p * V - remaining -= p - if remaining <= zero(T) + budget -= p + if budget <= zero(T) break end end @@ -438,389 +430,389 @@ Base.@propagate_inbounds function gap_value( return res end -################################################################ -# Bellman operator for OrthogonalIntervalMarkovDecisionProcess # -################################################################ -function _bellman_helper!( - workspace::Union{DenseOrthogonalWorkspace, SparseOrthogonalWorkspace, MixtureWorkspace}, - strategy_cache::AbstractStrategyCache, - Vres, - V, - prob, - stateptr; - upper_bound = false, - maximize = true, -) - bellman_precomputation!(workspace, V, prob, upper_bound) - - # For each source state - @inbounds for (jₛ_cart, jₛ_linear) in zip( - CartesianIndices(source_shape(prob)), - LinearIndices(source_shape(prob)), - ) - state_bellman!( - workspace, - strategy_cache, - Vres, - V, - prob, - stateptr, - jₛ_cart, - jₛ_linear; - upper_bound = upper_bound, - maximize = maximize, - ) - end - - return Vres -end - -function _bellman_helper!( - workspace::Union{ - ThreadedDenseOrthogonalWorkspace, - ThreadedSparseOrthogonalWorkspace, - ThreadedMixtureWorkspace, - }, - strategy_cache::AbstractStrategyCache, - Vres, - V, - prob, - stateptr; - upper_bound = false, - maximize = true, -) - bellman_precomputation!(workspace, V, prob, upper_bound) - - # For each source state - I_linear = LinearIndices(source_shape(prob)) - @threadstid tid for jₛ_cart in CartesianIndices(source_shape(prob)) - # We can't use @threadstid over a zip, so we need to manually index - jₛ_linear = I_linear[jₛ_cart] - - ws = workspace[tid] - - state_bellman!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr, - jₛ_cart, - jₛ_linear; - upper_bound = upper_bound, - maximize = maximize, - ) - end - - return Vres -end - -function bellman_precomputation!(workspace::DenseOrthogonalWorkspace, V, prob, upper_bound) - # Since sorting for the first level is shared among all higher levels, we can precompute it - product_nstates = num_target(prob) - - # For each higher-level state in the product space - for I in CartesianIndices(product_nstates[2:end]) - sort_dense_orthogonal(workspace, V, I, upper_bound) - end -end - -function bellman_precomputation!( - workspace::ThreadedDenseOrthogonalWorkspace, - V, - prob, - upper_bound, -) - # Since sorting for the first level is shared among all higher levels, we can precompute it - product_nstates = num_target(prob) - - # For each higher-level state in the product space - @threadstid tid for I in CartesianIndices(product_nstates[2:end]) - ws = workspace[tid] - sort_dense_orthogonal(ws, V, I, upper_bound) - end -end - -bellman_precomputation!( - workspace::Union{SparseOrthogonalWorkspace, ThreadedSparseOrthogonalWorkspace}, - V, - prob, - upper_bound, -) = nothing - -function sort_dense_orthogonal(workspace, V, I, upper_bound) - @inbounds begin - perm = @view workspace.permutation[axes(V, 1)] - sortperm!(perm, @view(V[:, I]); rev = upper_bound, scratch = workspace.scratch) - - copyto!(@view(first_level_perm(workspace)[:, I]), perm) - end -end - -function state_bellman!( - workspace::Union{DenseOrthogonalWorkspace, SparseOrthogonalWorkspace, MixtureWorkspace}, - strategy_cache::OptimizingStrategyCache, - Vres, - V, - prob, - stateptr, - jₛ_cart, - jₛ_linear; - upper_bound, - maximize, -) - @inbounds begin - s₁, s₂ = stateptr[jₛ_linear], stateptr[jₛ_linear + 1] - act_vals = @view actions(workspace)[1:(s₂ - s₁)] - - for (i, jₐ) in enumerate(s₁:(s₂ - 1)) - act_vals[i] = state_action_bellman(workspace, V, prob, jₐ, upper_bound) - end - - Vres[jₛ_cart] = extract_strategy!(strategy_cache, act_vals, V, jₛ_cart, maximize) - end -end - -function state_bellman!( - workspace::Union{DenseOrthogonalWorkspace, SparseOrthogonalWorkspace, MixtureWorkspace}, - strategy_cache::NonOptimizingStrategyCache, - Vres, - V, - prob, - stateptr, - jₛ_cart, - jₛ_linear; - upper_bound, - maximize, -) - @inbounds begin - s₁ = stateptr[jₛ_linear] - jₐ = s₁ + strategy_cache[jₛ_cart] - 1 - Vres[jₛ_cart] = state_action_bellman(workspace, V, prob, jₐ, upper_bound) - end -end - -Base.@propagate_inbounds function state_action_bellman( - workspace::DenseOrthogonalWorkspace, - V, - prob, - jₐ, - upper_bound, -) - # The only dimension - if ndims(prob) == 1 - return dense_sorted_state_action_bellman( - V, - prob[1], - jₐ, - first_level_perm(workspace), - ) - end - - Vₑ = workspace.expectation_cache - product_nstates = num_target(prob) - - # For each higher-level state in the product space - for I in CartesianIndices(product_nstates[2:end]) - - # For the first dimension, we need to copy the values from V - v = dense_sorted_state_action_bellman( - @view(V[:, I]), - prob[1], - jₐ, - # Use shared first level permutation across threads - @view(first_level_perm(workspace)[:, I]), - ) - Vₑ[1][I[1]] = v - - # For the remaining dimensions, if "full", compute expectation and store in the next level - for d in 2:(ndims(prob) - 1) - if I[d - 1] == product_nstates[d] - v = orthogonal_inner_bellman!( - workspace, - Vₑ[d - 1], - prob[d], - jₐ, - upper_bound, - ) - Vₑ[d][I[d]] = v - else - break - end - end - end - - # Last dimension - v = orthogonal_inner_bellman!(workspace, Vₑ[end], prob[end], jₐ, upper_bound) - - return v -end - -Base.@propagate_inbounds function orthogonal_inner_bellman!( - workspace, - V, - prob, - jₐ, - upper_bound::Bool, -) - perm = @view permutation(workspace)[1:length(V)] - - # rev=true for upper bound - sortperm!(perm, V; rev = upper_bound, scratch = scratch(workspace)) - - return dense_sorted_state_action_bellman(V, prob, jₐ, perm) -end - -Base.@propagate_inbounds function state_action_bellman( - workspace::SparseOrthogonalWorkspace, - V, - prob, - jₐ, - upper_bound, -) - # This function uses ntuple excessively to avoid allocations (list comprehension requires allocation, while ntuple does not) - nzinds_first = SparseArrays.nonzeroinds(gap(prob, 1, :, jₐ)) - nzinds_per_prob = - ntuple(i -> SparseArrays.nonzeroinds(gap(prob, i + 1, :, jₐ)), ndims(prob) - 1) - - lower_nzvals_per_prob = ntuple(i -> nonzeros(lower(prob, i, :, jₐ)), ndims(prob)) - gap_nzvals_per_prob = ntuple(i -> nonzeros(gap(prob, i, :, jₐ)), ndims(prob)) - sum_lower_per_prob = ntuple(i -> sum_lower(prob, i, jₐ), ndims(prob)) - - nnz_per_prob = ntuple(i -> nnz(gap(prob, i, :, jₐ)), ndims(prob)) - Vₑ = ntuple( - i -> @view(workspace.expectation_cache[i][1:nnz_per_prob[i + 1]]), - ndims(prob) - 1, - ) - - if ndims(prob) == 1 - # The only dimension - return orthogonal_sparse_inner_bellman!( - workspace, - @view(V[nzinds_first]), - lower_nzvals_per_prob[end], - gap_nzvals_per_prob[end], - sum_lower_per_prob[end], - upper_bound, - ) - end - - # For each higher-level state in the product space - for I in CartesianIndices(nnz_per_prob[2:end]) - Isparse = CartesianIndex(ntuple(d -> nzinds_per_prob[d][I[d]], ndims(prob) - 1)) - - # For the first dimension, we need to copy the values from V - v = orthogonal_sparse_inner_bellman!( - workspace, - @view(V[nzinds_first, Isparse]), - lower_nzvals_per_prob[1], - gap_nzvals_per_prob[1], - sum_lower_per_prob[1], - upper_bound, - ) - Vₑ[1][I[1]] = v - - # For the remaining dimensions, if "full", compute expectation and store in the next level - for d in 2:(ndims(prob) - 1) - if I[d - 1] == nnz_per_prob[d] - v = orthogonal_sparse_inner_bellman!( - workspace, - Vₑ[d - 1], - lower_nzvals_per_prob[d], - gap_nzvals_per_prob[d], - sum_lower_per_prob[d], - upper_bound, - ) - Vₑ[d][I[d]] = v - else - break - end - end - end - - # Last dimension - v = orthogonal_sparse_inner_bellman!( - workspace, - Vₑ[end], - lower_nzvals_per_prob[end], - gap_nzvals_per_prob[end], - sum_lower_per_prob[end], - upper_bound, - ) - - return v -end - -Base.@propagate_inbounds function orthogonal_sparse_inner_bellman!( - workspace::SparseOrthogonalWorkspace, - V, - lower, - gap, - sum_lower, - upper_bound::Bool, -) - Vp_workspace = @view workspace.values_gaps[1:length(gap)] - for (i, (v, p)) in enumerate(zip(V, gap)) - Vp_workspace[i] = (v, p) - end - - # rev=true for upper bound - sort!(Vp_workspace; rev = upper_bound, scratch = scratch(workspace)) - - return dot(V, lower) + gap_value(Vp_workspace, sum_lower) -end - -############################################################# -# Bellman operator for MixtureIntervalMarkovDecisionProcess # -############################################################# -bellman_precomputation!(workspace::MixtureWorkspace, V, prob, upper_bound) = - bellman_precomputation!(workspace.orthogonal_workspace, V, prob, upper_bound) - -function bellman_precomputation!( - workspace::ThreadedMixtureWorkspace{<:DenseOrthogonalWorkspace}, - V, - prob, - upper_bound, -) - # Since sorting for the first level is shared among all higher levels, we can precompute it - product_nstates = num_target(prob) - - # For each higher-level state in the product space - @threadstid tid for I in CartesianIndices(product_nstates[2:end]) - ws = workspace[tid] - sort_dense_orthogonal(ws.orthogonal_workspace, V, I, upper_bound) - end -end - -bellman_precomputation!( - workspace::ThreadedMixtureWorkspace{<:SparseOrthogonalWorkspace}, - V, - prob, - upper_bound, -) = nothing - -Base.@propagate_inbounds function state_action_bellman( - workspace::MixtureWorkspace, - V, - prob, - jₐ, - upper_bound, -) - # Value iteration for each model in the mixture (for source-action pair jₐ) - for (k, p) in enumerate(prob) - v = state_action_bellman(workspace.orthogonal_workspace, V, p, jₐ, upper_bound) - workspace.mixture_cache[k] = v - end - - # Combine mixture with weighting probabilities - v = orthogonal_inner_bellman!( - workspace, - workspace.mixture_cache, - weighting_probs(prob), - jₐ, - upper_bound, - ) - - return v -end +# ################################################################ +# # Bellman operator for OrthogonalIntervalMarkovDecisionProcess # +# ################################################################ +# function _bellman_helper!( +# workspace::Union{DenseOrthogonalWorkspace, SparseOrthogonalWorkspace, MixtureWorkspace}, +# strategy_cache::AbstractStrategyCache, +# Vres, +# V, +# prob, +# stateptr; +# upper_bound = false, +# maximize = true, +# ) +# bellman_precomputation!(workspace, V, prob, upper_bound) + +# # For each source state +# @inbounds for (jₛ_cart, jₛ_linear) in zip( +# CartesianIndices(source_shape(prob)), +# LinearIndices(source_shape(prob)), +# ) +# state_bellman!( +# workspace, +# strategy_cache, +# Vres, +# V, +# prob, +# stateptr, +# jₛ_cart, +# jₛ_linear; +# upper_bound = upper_bound, +# maximize = maximize, +# ) +# end + +# return Vres +# end + +# function _bellman_helper!( +# workspace::Union{ +# ThreadedDenseOrthogonalWorkspace, +# ThreadedSparseOrthogonalWorkspace, +# ThreadedMixtureWorkspace, +# }, +# strategy_cache::AbstractStrategyCache, +# Vres, +# V, +# prob, +# stateptr; +# upper_bound = false, +# maximize = true, +# ) +# bellman_precomputation!(workspace, V, prob, upper_bound) + +# # For each source state +# I_linear = LinearIndices(source_shape(prob)) +# @threadstid tid for jₛ_cart in CartesianIndices(source_shape(prob)) +# # We can't use @threadstid over a zip, so we need to manually index +# jₛ_linear = I_linear[jₛ_cart] + +# ws = workspace[tid] + +# state_bellman!( +# ws, +# strategy_cache, +# Vres, +# V, +# prob, +# stateptr, +# jₛ_cart, +# jₛ_linear; +# upper_bound = upper_bound, +# maximize = maximize, +# ) +# end + +# return Vres +# end + +# function bellman_precomputation!(workspace::DenseOrthogonalWorkspace, V, prob, upper_bound) +# # Since sorting for the first level is shared among all higher levels, we can precompute it +# product_nstates = num_target(prob) + +# # For each higher-level state in the product space +# for I in CartesianIndices(product_nstates[2:end]) +# sort_dense_orthogonal(workspace, V, I, upper_bound) +# end +# end + +# function bellman_precomputation!( +# workspace::ThreadedDenseOrthogonalWorkspace, +# V, +# prob, +# upper_bound, +# ) +# # Since sorting for the first level is shared among all higher levels, we can precompute it +# product_nstates = num_target(prob) + +# # For each higher-level state in the product space +# @threadstid tid for I in CartesianIndices(product_nstates[2:end]) +# ws = workspace[tid] +# sort_dense_orthogonal(ws, V, I, upper_bound) +# end +# end + +# bellman_precomputation!( +# workspace::Union{SparseOrthogonalWorkspace, ThreadedSparseOrthogonalWorkspace}, +# V, +# prob, +# upper_bound, +# ) = nothing + +# function sort_dense_orthogonal(workspace, V, I, upper_bound) +# @inbounds begin +# perm = @view workspace.permutation[axes(V, 1)] +# sortperm!(perm, @view(V[:, I]); rev = upper_bound, scratch = workspace.scratch) + +# copyto!(@view(first_level_perm(workspace)[:, I]), perm) +# end +# end + +# function state_bellman!( +# workspace::Union{DenseOrthogonalWorkspace, SparseOrthogonalWorkspace, MixtureWorkspace}, +# strategy_cache::OptimizingStrategyCache, +# Vres, +# V, +# prob, +# stateptr, +# jₛ_cart, +# jₛ_linear; +# upper_bound, +# maximize, +# ) +# @inbounds begin +# s₁, s₂ = stateptr[jₛ_linear], stateptr[jₛ_linear + 1] +# act_vals = @view actions(workspace)[1:(s₂ - s₁)] + +# for (i, jₐ) in enumerate(s₁:(s₂ - 1)) +# act_vals[i] = state_action_bellman(workspace, V, prob, jₐ, upper_bound) +# end + +# Vres[jₛ_cart] = extract_strategy!(strategy_cache, act_vals, V, jₛ_cart, maximize) +# end +# end + +# function state_bellman!( +# workspace::Union{DenseOrthogonalWorkspace, SparseOrthogonalWorkspace, MixtureWorkspace}, +# strategy_cache::NonOptimizingStrategyCache, +# Vres, +# V, +# prob, +# stateptr, +# jₛ_cart, +# jₛ_linear; +# upper_bound, +# maximize, +# ) +# @inbounds begin +# s₁ = stateptr[jₛ_linear] +# jₐ = s₁ + strategy_cache[jₛ_cart] - 1 +# Vres[jₛ_cart] = state_action_bellman(workspace, V, prob, jₐ, upper_bound) +# end +# end + +# Base.@propagate_inbounds function state_action_bellman( +# workspace::DenseOrthogonalWorkspace, +# V, +# prob, +# jₐ, +# upper_bound, +# ) +# # The only dimension +# if ndims(prob) == 1 +# return dense_sorted_state_action_bellman( +# V, +# prob[1], +# jₐ, +# first_level_perm(workspace), +# ) +# end + +# Vₑ = workspace.expectation_cache +# product_nstates = num_target(prob) + +# # For each higher-level state in the product space +# for I in CartesianIndices(product_nstates[2:end]) + +# # For the first dimension, we need to copy the values from V +# v = dense_sorted_state_action_bellman( +# @view(V[:, I]), +# prob[1], +# jₐ, +# # Use shared first level permutation across threads +# @view(first_level_perm(workspace)[:, I]), +# ) +# Vₑ[1][I[1]] = v + +# # For the remaining dimensions, if "full", compute expectation and store in the next level +# for d in 2:(ndims(prob) - 1) +# if I[d - 1] == product_nstates[d] +# v = orthogonal_inner_bellman!( +# workspace, +# Vₑ[d - 1], +# prob[d], +# jₐ, +# upper_bound, +# ) +# Vₑ[d][I[d]] = v +# else +# break +# end +# end +# end + +# # Last dimension +# v = orthogonal_inner_bellman!(workspace, Vₑ[end], prob[end], jₐ, upper_bound) + +# return v +# end + +# Base.@propagate_inbounds function orthogonal_inner_bellman!( +# workspace, +# V, +# prob, +# jₐ, +# upper_bound::Bool, +# ) +# perm = @view permutation(workspace)[1:length(V)] + +# # rev=true for upper bound +# sortperm!(perm, V; rev = upper_bound, scratch = scratch(workspace)) + +# return dense_sorted_state_action_bellman(V, prob, jₐ, perm) +# end + +# Base.@propagate_inbounds function state_action_bellman( +# workspace::SparseOrthogonalWorkspace, +# V, +# prob, +# jₐ, +# upper_bound, +# ) +# # This function uses ntuple excessively to avoid allocations (list comprehension requires allocation, while ntuple does not) +# nzinds_first = SparseArrays.nonzeroinds(gap(prob, 1, :, jₐ)) +# nzinds_per_prob = +# ntuple(i -> SparseArrays.nonzeroinds(gap(prob, i + 1, :, jₐ)), ndims(prob) - 1) + +# lower_nzvals_per_prob = ntuple(i -> nonzeros(lower(prob, i, :, jₐ)), ndims(prob)) +# gap_nzvals_per_prob = ntuple(i -> nonzeros(gap(prob, i, :, jₐ)), ndims(prob)) +# sum_lower_per_prob = ntuple(i -> sum_lower(prob, i, jₐ), ndims(prob)) + +# nnz_per_prob = ntuple(i -> nnz(gap(prob, i, :, jₐ)), ndims(prob)) +# Vₑ = ntuple( +# i -> @view(workspace.expectation_cache[i][1:nnz_per_prob[i + 1]]), +# ndims(prob) - 1, +# ) + +# if ndims(prob) == 1 +# # The only dimension +# return orthogonal_sparse_inner_bellman!( +# workspace, +# @view(V[nzinds_first]), +# lower_nzvals_per_prob[end], +# gap_nzvals_per_prob[end], +# sum_lower_per_prob[end], +# upper_bound, +# ) +# end + +# # For each higher-level state in the product space +# for I in CartesianIndices(nnz_per_prob[2:end]) +# Isparse = CartesianIndex(ntuple(d -> nzinds_per_prob[d][I[d]], ndims(prob) - 1)) + +# # For the first dimension, we need to copy the values from V +# v = orthogonal_sparse_inner_bellman!( +# workspace, +# @view(V[nzinds_first, Isparse]), +# lower_nzvals_per_prob[1], +# gap_nzvals_per_prob[1], +# sum_lower_per_prob[1], +# upper_bound, +# ) +# Vₑ[1][I[1]] = v + +# # For the remaining dimensions, if "full", compute expectation and store in the next level +# for d in 2:(ndims(prob) - 1) +# if I[d - 1] == nnz_per_prob[d] +# v = orthogonal_sparse_inner_bellman!( +# workspace, +# Vₑ[d - 1], +# lower_nzvals_per_prob[d], +# gap_nzvals_per_prob[d], +# sum_lower_per_prob[d], +# upper_bound, +# ) +# Vₑ[d][I[d]] = v +# else +# break +# end +# end +# end + +# # Last dimension +# v = orthogonal_sparse_inner_bellman!( +# workspace, +# Vₑ[end], +# lower_nzvals_per_prob[end], +# gap_nzvals_per_prob[end], +# sum_lower_per_prob[end], +# upper_bound, +# ) + +# return v +# end + +# Base.@propagate_inbounds function orthogonal_sparse_inner_bellman!( +# workspace::SparseOrthogonalWorkspace, +# V, +# lower, +# gap, +# sum_lower, +# upper_bound::Bool, +# ) +# Vp_workspace = @view workspace.values_gaps[1:length(gap)] +# for (i, (v, p)) in enumerate(zip(V, gap)) +# Vp_workspace[i] = (v, p) +# end + +# # rev=true for upper bound +# sort!(Vp_workspace; rev = upper_bound, scratch = scratch(workspace)) + +# return dot(V, lower) + gap_value(Vp_workspace, sum_lower) +# end + +# ############################################################# +# # Bellman operator for MixtureIntervalMarkovDecisionProcess # +# ############################################################# +# bellman_precomputation!(workspace::MixtureWorkspace, V, prob, upper_bound) = +# bellman_precomputation!(workspace.orthogonal_workspace, V, prob, upper_bound) + +# function bellman_precomputation!( +# workspace::ThreadedMixtureWorkspace{<:DenseOrthogonalWorkspace}, +# V, +# prob, +# upper_bound, +# ) +# # Since sorting for the first level is shared among all higher levels, we can precompute it +# product_nstates = num_target(prob) + +# # For each higher-level state in the product space +# @threadstid tid for I in CartesianIndices(product_nstates[2:end]) +# ws = workspace[tid] +# sort_dense_orthogonal(ws.orthogonal_workspace, V, I, upper_bound) +# end +# end + +# bellman_precomputation!( +# workspace::ThreadedMixtureWorkspace{<:SparseOrthogonalWorkspace}, +# V, +# prob, +# upper_bound, +# ) = nothing + +# Base.@propagate_inbounds function state_action_bellman( +# workspace::MixtureWorkspace, +# V, +# prob, +# jₐ, +# upper_bound, +# ) +# # Value iteration for each model in the mixture (for source-action pair jₐ) +# for (k, p) in enumerate(prob) +# v = state_action_bellman(workspace.orthogonal_workspace, V, p, jₐ, upper_bound) +# workspace.mixture_cache[k] = v +# end + +# # Combine mixture with weighting probabilities +# v = orthogonal_inner_bellman!( +# workspace, +# workspace.mixture_cache, +# weighting_probs(prob), +# jₐ, +# upper_bound, +# ) + +# return v +# end diff --git a/src/cuda.jl b/src/cuda.jl index e848591c..4fc4c131 100644 --- a/src/cuda.jl +++ b/src/cuda.jl @@ -27,23 +27,19 @@ function Base.showerror(io::IO, e::OutOfSharedMemory) ) end -function checkdevice(v::AbstractArray, system::IntervalMarkovProcess) - checkdevice(v, transition_prob(system)) +function checkdevice(v::AbstractArray, system::FactoredRMDP) + for marginal in system.transition + checkdevice(v, marginal) + end end -function checkdevice(v::AbstractArray, p::IntervalAmbiguitySet) - # Lower and gap are required to be the same type. - checkdevice(v, lower(p)) +function checkdevice(v::AbstractArray, marginal::Marginal) + checkdevice(v, marginal.ambiguity_sets) end -function checkdevice(v::AbstractArray, p::OrthogonalIntervalProbabilities) - # All axes of p are required to be the same type. - checkdevice(v, first(pᵢ)) -end - -function checkdevice(v::AbstractArray, p::MixtureIntervalProbabilities) - # All mixtures (and weighting_probs) of p are required to be the same type. - checkdevice(v, first(pᵢ)) +function checkdevice(v::AbstractArray, p::IntervalAmbiguitySets) + # Lower and gap are required to be the same type, so not necessary to check. + checkdevice(v, p.lower) end function checkdevice(::AbstractArray, ::AbstractMatrix) diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl index 3848ee0e..cead9d20 100644 --- a/src/models/FactoredRobustMarkovDecisionProcess.jl +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -7,6 +7,8 @@ struct FactoredRobustMarkovDecisionProcess{ } <: IntervalMarkovProcess state_vars::NTuple{N, Int32} # N is the number of state variables and state_vars[n] is the number of states for state variable n action_vars::NTuple{M, Int32} # M is the number of action variables and action_vars[m] is the number of actions for action variable m + + source_dims::NTuple{N, Int32} transition::P initial_states::VI @@ -14,26 +16,55 @@ struct FactoredRobustMarkovDecisionProcess{ function FactoredRobustMarkovDecisionProcess( state_vars::NTuple{N, Int32}, action_vars::NTuple{M, Int32}, + source_dims::NTuple{N, Int32}, transition::P, initial_states::VI = nothing, - ) where {N, M, P <: NTuple{N, <:AbstractMarginal}, VI <: InitialStates{N}} - check_rmdp(state_vars, action_vars, transition, initial_states) + ) where {N, M, P <: NTuple{N, <:AbstractMarginal}, VI <: InitialStates} + check_rmdp(state_vars, action_vars, source_dims, transition, initial_states) - return new{N, M, P, VI}(state_vars, action_vars, transition, initial_states) + return new{N, M, P, VI}(state_vars, action_vars, source_dims, transition, initial_states) end end +const FactoredRMDP = FactoredRobustMarkovDecisionProcess + +function FactoredRMDP( + state_vars::NTuple{N, Int}, + action_vars::NTuple{M, Int}, + source_dims::NTuple{N, Int}, + transition::P, + initial_states::VI = AllStates(), +) where {N, M, P <: NTuple{N, <:AbstractMarginal}, VI <: InitialStates} + state_vars_32 = Int32.(state_vars) + action_vars_32 = Int32.(action_vars) + source_dims_32 = Int32.(source_dims) -function check_rmdp(state_vars, action_vars, transition, initial_states) - check_state_variables(state_vars) + return FactoredRobustMarkovDecisionProcess(state_vars_32, action_vars_32, source_dims_32, transition, initial_states) +end + +function FactoredRMDP( + state_vars::NTuple{N, <:Integer}, + action_vars::NTuple{M, <:Integer}, + transition::P, + initial_states::VI = AllStates(), +) where {N, M, P <: NTuple{N, <:AbstractMarginal}, VI <: InitialStates} + return FactoredRobustMarkovDecisionProcess{N, M, P, VI}(state_vars, action_vars, state_vars, transition, initial_states) +end + +function check_rmdp(state_vars, action_vars, source_dims, transition, initial_states) + check_state_variables(state_vars, source_dims) check_action_variables(action_vars) - check_transition(state_vars, action_vars, transition) + check_transition(state_vars, action_vars, source_dims, transition) check_initial_states(state_vars, initial_states) end -function check_state_variables(state_vars) - if any(x -> x <= 0, state_vars) +function check_state_variables(state_vars, source_dims) + if any(n -> n <= 0, state_vars) throw(ArgumentError("All state variables must be positive integers.")) end + + if any(i -> source_dims[i] <= 0 || source_dims[i] > state_vars[i], eachindex(state_vars)) + throw(ArgumentError("All source dimensions must be positive integers and less than or equal to the corresponding state variable.")) + end end function check_action_variables(action_vars) @@ -42,17 +73,17 @@ function check_action_variables(action_vars) end end -function check_transition(state_dims, action_dims, transition) +function check_transition(state_dims, action_dims, source_dims, transition) for (i, marginal) in enumerate(transition) if num_target(marginal) != state_dims[i] throw(DimensionMismatch("Marginal $i has incorrect number of target states. Expected $(state_dims[i]), got $(num_target(marginal)).")) end - if source_shape(marginal) != state_dims[state_variables(marginal)] - throw(DimensionMismatch("Marginal $i has incorrect source shape. Expected $state_dims, got $(source_shape(marginal)).")) + if source_shape(marginal) != getindex.(Tuple(source_dims), state_variables(marginal)) # source_dims[state_variables(marginal)] + throw(DimensionMismatch("Marginal $i has incorrect source shape. Expected $source_dims, got $(source_shape(marginal)).")) end - if action_shape(marginal) != action_dims[action_variables(marginal)] + if action_shape(marginal) != getindex.(Tuple(action_dims), action_variables(marginal)) throw(DimensionMismatch("Marginal $i has incorrect action shape. Expected $action_dims, got $(action_shape(marginal)).")) end end @@ -79,5 +110,13 @@ state_variables(rmdp::FactoredRobustMarkovDecisionProcess) = rmdp.state_vars action_variables(rmdp::FactoredRobustMarkovDecisionProcess) = rmdp.action_vars num_states(rmdp::FactoredRobustMarkovDecisionProcess) = prod(state_variables(rmdp)) num_actions(rmdp::FactoredRobustMarkovDecisionProcess) = prod(action_variables(rmdp)) +marginals(rmdp::FactoredRobustMarkovDecisionProcess) = rmdp.transition +initial_states(rmdp::FactoredRobustMarkovDecisionProcess) = rmdp.initial_states + +source_shape(m::FactoredRobustMarkovDecisionProcess) = m.source_dims +action_shape(m::FactoredRobustMarkovDecisionProcess) = m.action_vars -source_shape(m::FactoredRobustMarkovDecisionProcess) = m.state_vars \ No newline at end of file +sub2ind(rmdp::FactoredRMDP, r, jₛ, jₐ) = sub2ind(rmdp.transition[r], jₛ, jₐ) +function Base.getindex(rmdp::FactoredRMDP, r::Int, jₛ, jₐ) + return rmdp.transition[r][jₛ, jₐ] +end \ No newline at end of file diff --git a/src/models/IntervalMarkovChain.jl b/src/models/IntervalMarkovChain.jl new file mode 100644 index 00000000..3706a27a --- /dev/null +++ b/src/models/IntervalMarkovChain.jl @@ -0,0 +1,17 @@ +function IntervalMarkovChain(ambiguity_set::IntervalAmbiguitySets{R, MR}, initial_states=AllStates()) where {R, MR <: AbstractMatrix{R}} + state_vars = (num_target(ambiguity_set),) + + state_indices = (1,) + action_indices = (1,) + source_dims = (num_sets(ambiguity_set),) + action_vars = (1,) + marginal = Marginal(ambiguity_set, state_indices, action_indices, source_dims, action_vars) + + return FactoredRMDP( # wrap in a FactoredRMDP for consistency + state_vars, + action_vars, + source_dims, + (marginal,), + initial_states, + ) +end \ No newline at end of file diff --git a/src/models/IntervalMarkovDecisionProcess.jl b/src/models/IntervalMarkovDecisionProcess.jl new file mode 100644 index 00000000..f636e1c9 --- /dev/null +++ b/src/models/IntervalMarkovDecisionProcess.jl @@ -0,0 +1,17 @@ + +function IntervalMarkovDecisionProcess(states::Vector{S}, actions::Vector{A}, transition_intervals::Dict{Tuple{S,A,S}, Tuple{Float64, Float64}}, rewards::Dict{Tuple{S,A}, Float64}) where {S,A} + # Validate inputs + for ((s, a, s_next), (p_min, p_max)) in transition_intervals + @assert 0.0 <= p_min <= p_max <= 1.0 "Transition probabilities must be in [0, 1] and p_min <= p_max" + end + for s in states + for a in actions + total_min = sum(p_min for ((s2, a2, s3), (p_min, p_max)) in transition_intervals if s2 == s && a2 == a) + total_max = sum(p_max for ((s2, a2, s3), (p_min, p_max)) in transition_intervals if s2 == s && a2 == a) + @assert total_min <= 1.0 "Total minimum transition probability from state $s with action $a exceeds 1" + @assert total_max <= 1.0 "Total maximum transition probability from state $s with action $a exceeds 1" + end + end + + return IntervalMarkovDecisionProcess{S,A}(states, actions, transition_intervals, rewards) +end \ No newline at end of file diff --git a/src/models/IntervalMarkovProcess.jl b/src/models/IntervalMarkovProcess.jl index 59a6a2c6..75df0308 100644 --- a/src/models/IntervalMarkovProcess.jl +++ b/src/models/IntervalMarkovProcess.jl @@ -18,18 +18,18 @@ initial_states(mp::IntervalMarkovProcess) = mp.initial_states A type to represent all states in a Markov process. This type is used to specify all states as the initial states. """ struct AllStates end -const InitialStates = Union{AllStates, AbstractVector} +const InitialStates = Union{AllStates, <:AbstractVector} """ num_states(mp::IntervalMarkovProcess) Return the number of states. """ -num_states(mp::IntervalMarkovProcess) = mp.num_states +function num_states end """ - transition_prob(mp::IntervalMarkovProcess) + num_actions(mp::IntervalMarkovProcess) -Return the interval on transition probabilities. +Return the number of actions. """ -transition_prob(mp::IntervalMarkovProcess) = mp.transition_prob +function num_actions end diff --git a/src/models/ProductProcess.jl b/src/models/ProductProcess.jl index 6e8471b8..2fc77693 100644 --- a/src/models/ProductProcess.jl +++ b/src/models/ProductProcess.jl @@ -94,7 +94,7 @@ Return the labelling function of the product """ labelling_function(proc::ProductProcess) = proc.labelling_func -product_num_states(proc::ProductProcess) = - (product_num_states(markov_process(proc))..., num_states(automaton(proc))) -source_shape(proc::ProductProcess) = - (source_shape(markov_process(proc))..., num_states(automaton(proc))) +state_variables(proc::ProductProcess) = (state_variables(markov_process(proc))..., num_states(automaton(proc))) +source_shape(proc::ProductProcess) = (source_shape(markov_process(proc))..., num_states(automaton(proc))) +action_variables(proc::ProductProcess) = action_variables(markov_process(proc)) +action_shape(proc::ProductProcess) = action_shape(markov_process(proc)) \ No newline at end of file diff --git a/src/models/models.jl b/src/models/models.jl index e5a78432..8fd80ff5 100644 --- a/src/models/models.jl +++ b/src/models/models.jl @@ -5,9 +5,11 @@ export IntervalMarkovProcess, AllStates export num_states, num_actions, initial_states include("FactoredRobustMarkovDecisionProcess.jl") -const FactoredRMDP = FactoredRobustMarkovDecisionProcess export FactoredRobustMarkovDecisionProcess, state_variables, action_variables +include("IntervalMarkovChain.jl") +export IntervalMarkovChain + include("DeterministicAutomaton.jl") include("DFA.jl") diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index 85f51267..588b9be3 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -38,76 +38,17 @@ sparse_prob = IntervalAmbiguitySets(; [1] M. Lahijanian, S. B. Andersson and C. Belta, "Formal Verification and Synthesis for Discrete-Time Stochastic Systems," in IEEE Transactions on Automatic Control, vol. 60, no. 8, pp. 2031-2045, Aug. 2015, doi: 10.1109/TAC.2015.2398883. """ -struct IntervalAmbiguitySets{R, MR <: AbstractMatrix{R}, N, M, I <: LinearIndices} <: AbstractMarginal +struct IntervalAmbiguitySets{R, MR <: AbstractMatrix{R}} <: AbstractAmbiguitySets lower::MR gap::MR - state_indices::NTuple{N, Int32} - action_indices::NTuple{M, Int32} + function IntervalAmbiguitySets(lower::MR, gap::MR) where {R, MR <: AbstractMatrix{R}} + checkprobabilities(lower, gap) - source_dims::NTuple{N, Int32} - action_dims::NTuple{M, Int32} - linear_index::I - - function IntervalAmbiguitySets(lower::MR, gap::MR, state_indices, action_indices, source_dims, action_dims) where {R, MR <: AbstractMatrix{R}} - checkprobabilities!(lower, gap) - - linear_index = LinearIndices((source_dims..., action_dims...)) - return IntervalAmbiguitySets(lower, gap, state_indices, action_indices, source_dims, action_dims, linear_index) + return new{R, MR}(lower, gap) end end -# Constructor for upper and lower bounds -# Constructor if no state/action indices are given (i.e. only one state and one action variable) - -function checkprobabilities!(lower::AbstractMatrix, gap::AbstractMatrix) - @assert all(lower .>= 0) "The lower bound transition probabilities must be non-negative." - @assert all(gap .>= 0) "The gap transition probabilities must be non-negative." - @assert all(lower .+ gap .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1." - - sum_lower = vec(sum(lower; dims = 1)) - max_lower_bound = maximum(sum_lower) - @assert max_lower_bound <= 1 "The joint lower bound transition probability per column (max is $max_lower_bound) should be less than or equal to 1." - - sum_upper = vec(sum(lower + gap; dims = 1)) - max_upper_bound = minimum(sum_upper) - @assert max_upper_bound >= 1 "The joint upper bound transition probability per column (min is $max_upper_bound) should be greater than or equal to 1." -end - -function checkprobabilities!(lower::AbstractSparseMatrix, gap::AbstractSparseMatrix) - @assert all(nonzeros(lower) .>= 0) "The lower bound transition probabilities must be non-negative." - @assert all(nonzeros(gap) .>= 0) "The gap transition probabilities must be non-negative." - @assert all(nonzeros(lower) .+ nonzeros(gap) .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1." - - sum_lower = vec(sum(lower; dims = 1)) - max_lower_bound = maximum(sum_lower) - @assert max_lower_bound <= 1 "The joint lower bound transition probability per column (max is $max_lower_bound) should be less than or equal to 1." - - sum_upper = vec(sum(lower + gap; dims = 1)) - max_upper_bound = minimum(sum_upper) - @assert max_upper_bound >= 1 "The joint upper bound transition probability per column (min is $max_upper_bound) should be greater than or equal to 1." -end - -function checkindices( - state_indices::NTuple{N, Int32}, - action_indices::NTuple{M, Int32}, - source_dims::NTuple{N, Int32}, - action_dims::NTuple{M, Int32}, -) where {N, M} - # TODO: More checks - @assert all(state_indices .> 0) "State indices must be positive." - @assert all(action_indices .> 0) "Action indices must be positive." - - @assert length(state_indices) == length(source_dims) "Length of state indices must match length of source dimensions." - @assert length(action_indices) == length(action_dims) "Length of action indices must match length of action dimensions." - - total_source = prod(source_dims) - total_action = prod(action_dims) - - @assert all(state_indices .<= total_source) "State indices must not exceed total number of source states ($total_source)." - @assert all(action_indices .<= total_action) "Action indices must not exceed total number of actions ($total_action)." -end - # Keyword constructor from lower and upper function IntervalAmbiguitySets(; lower::MR, upper::MR) where {MR <: AbstractMatrix} lower, gap = compute_gap(lower, upper) @@ -148,24 +89,40 @@ function compute_gap( return lower, gap end -state_variables(p::IntervalAmbiguitySets) = p.source_indices -action_variables(p::IntervalAmbiguitySets) = p.action_indices -source_shape(p::IntervalAmbiguitySets) = p.source_dims -action_shape(p::IntervalAmbiguitySets) = p.action_dims -num_target(p::IntervalAmbiguitySets) = size(p.lower, 1) +function checkprobabilities(lower::AbstractMatrix, gap::AbstractMatrix) + @assert all(lower .>= 0) "The lower bound transition probabilities must be non-negative." + @assert all(gap .>= 0) "The gap transition probabilities must be non-negative." + @assert all(lower .+ gap .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1." -struct IntervalAmbiguitySet{R, VR <: AbstractVector{R}} - lower::VR - gap::VR + sum_lower = vec(sum(lower; dims = 1)) + max_lower_bound = maximum(sum_lower) + @assert max_lower_bound <= 1 "The joint lower bound transition probability per column (max is $max_lower_bound) should be less than or equal to 1." + + sum_upper = vec(sum(lower + gap; dims = 1)) + max_upper_bound = minimum(sum_upper) + @assert max_upper_bound >= 1 "The joint upper bound transition probability per column (min is $max_upper_bound) should be greater than or equal to 1." end -function Base.getindex(p::IntervalAmbiguitySets, source::CartesianIndex, action::CartesianIndex) - source = Tuple(source)[p.state_indices] - action = Tuple(action)[p.action_indices] - j = p.linear_index[source..., action...] +function checkprobabilities!(lower::AbstractSparseMatrix, gap::AbstractSparseMatrix) + @assert all(nonzeros(lower) .>= 0) "The lower bound transition probabilities must be non-negative." + @assert all(nonzeros(gap) .>= 0) "The gap transition probabilities must be non-negative." + @assert all(nonzeros(lower) .+ nonzeros(gap) .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1." - # TODO: Consider ways to avoid specifying/allocating self-loops + sum_lower = vec(sum(lower; dims = 1)) + max_lower_bound = maximum(sum_lower) + @assert max_lower_bound <= 1 "The joint lower bound transition probability per column (max is $max_lower_bound) should be less than or equal to 1." + sum_upper = vec(sum(lower + gap; dims = 1)) + max_upper_bound = minimum(sum_upper) + @assert max_upper_bound >= 1 "The joint upper bound transition probability per column (min is $max_upper_bound) should be greater than or equal to 1." +end +num_target(p::IntervalAmbiguitySets) = size(p.lower, 1) +num_sets(p::IntervalAmbiguitySets) = size(p.lower, 2) +source_shape(p::IntervalAmbiguitySets) = (num_sets(p),) +action_shape(::IntervalAmbiguitySets) = (1,) +marginals(p::IntervalAmbiguitySets) = (p,) + +function Base.getindex(p::IntervalAmbiguitySets, j) # Select by columns only! l = @view p.lower[:, j] g = @view p.gap[:, j] @@ -173,6 +130,30 @@ function Base.getindex(p::IntervalAmbiguitySets, source::CartesianIndex, action: return IntervalAmbiguitySet(l, g) end +sub2ind(::IntervalAmbiguitySets, jₛ, jₐ) = jₛ +function Base.getindex(p::IntervalAmbiguitySets, jₛ, jₐ) + # Select by columns only! + l = @view p.lower[:, jₛ] + g = @view p.gap[:, jₛ] + + return p[jₛ] +end + +Base.iterate(p::IntervalAmbiguitySets) = (p[1], 2) +function Base.iterate(p::IntervalAmbiguitySets, state) + if state > num_sets(p) + return nothing + else + return (p[state], state + 1) + end +end +Base.length(p::IntervalAmbiguitySets) = num_sets(p) + +struct IntervalAmbiguitySet{R, VR <: AbstractVector{R}} + lower::VR + gap::VR +end + lower(p::IntervalAmbiguitySet) = p.lower lower(p::IntervalAmbiguitySet, destination) = p.lower[destination] @@ -183,7 +164,8 @@ gap(p::IntervalAmbiguitySet) = p.gap gap(p::IntervalAmbiguitySet, destination) = p.gap[destination] const ColumnView{Tv} = SubArray{Tv, 1, <:AbstractMatrix{Tv}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} -support(p::IntervalAmbiguitySet{R, <:ColumnView{R}}) = eachindex(p.lower) +support(p::IntervalAmbiguitySet{R, <:ColumnView{R}}) where {R} = eachindex(p.gap) -const SparseColumnView{Tv, Ti} = SubArray{Tv, 1, <:AbstractSparseMatrixCSC{Tv, Ti}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} -support(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) = rowvals(p.lower) \ No newline at end of file +const SparseColumnView{Tv, Ti} = SubArray{Tv, 1, <:SparseArrays.AbstractSparseMatrixCSC{Tv, Ti}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} +support(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) where {R} = rowvals(p.gap) +SparseArrays.nnz(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) where {R} = nnz(p.gap) \ No newline at end of file diff --git a/src/probabilities/Marginal.jl b/src/probabilities/Marginal.jl new file mode 100644 index 00000000..6a3af190 --- /dev/null +++ b/src/probabilities/Marginal.jl @@ -0,0 +1,71 @@ +struct SARectangularMarginal{A <: AbstractAmbiguitySets, N, M, I <: LinearIndices} <: AbstractMarginal + ambiguity_sets::A + + state_indices::NTuple{N, Int32} + action_indices::NTuple{M, Int32} + + source_dims::NTuple{N, Int32} + action_vars::NTuple{M, Int32} + linear_index::I + + function SARectangularMarginal( + ambiguity_sets::A, + state_indices::NTuple{N, Int32}, + action_indices::NTuple{M, Int32}, + source_dims::NTuple{N, Int32}, + action_vars::NTuple{M, Int32}, + ) where {A <: AbstractAmbiguitySets, N, M} + checkindices(ambiguity_sets, state_indices, action_indices, source_dims, action_vars) + + linear_index = LinearIndices((source_dims..., action_vars...)) + return new{A, N, M, typeof(linear_index)}(ambiguity_sets, state_indices, action_indices, source_dims, action_vars, linear_index) + end +end +const Marginal = SARectangularMarginal + +function Marginal( + ambiguity_sets::A, + state_indices::NTuple{N, Int}, + action_indices::NTuple{M, Int}, + source_dims::NTuple{N, Int}, + action_vars::NTuple{M, Int}, +) where {A <: AbstractAmbiguitySets, N, M} + state_indices_32 = Int32.(state_indices) + action_indices_32 = Int32.(action_indices) + + source_dims_32 = Int32.(source_dims) + action_vars_32 = Int32.(action_vars) + + return SARectangularMarginal(ambiguity_sets, state_indices_32, action_indices_32, source_dims_32, action_vars_32) +end + +# Constructor if no state/action indices are given (i.e. only one state and one action variable) + +function checkindices(ambiguity_sets, state_indices, action_indices, source_dims, action_vars) + # TODO: More checks incl. consistency with ambiguity sets + @assert all(state_indices .> 0) "State indices must be positive." + @assert all(action_indices .> 0) "Action indices must be positive." + + @assert length(state_indices) == length(source_dims) "Length of state indices must match length of source dimensions." + @assert length(action_indices) == length(action_vars) "Length of action indices must match length of action dimensions." +end + +ambiguity_sets(p::Marginal) = p.ambiguity_sets +state_variables(p::Marginal) = p.state_indices +action_variables(p::Marginal) = p.action_indices +source_shape(p::Marginal) = p.source_dims +action_shape(p::Marginal) = p.action_vars +num_target(p::Marginal) = num_target(ambiguity_sets(p)) + +function Base.getindex(p::Marginal, source, action) + return ambiguity_sets(p)[sub2ind(p, source, action)] +end + +sub2ind(p::Marginal, source::CartesianIndex, action::CartesianIndex) = sub2ind(p, Tuple(source), Tuple(action)) +function sub2ind(p::Marginal, source::NTuple{N, <:Integer}, action::NTuple{M, <:Integer}) where {N, M} + source = getindex.(Tuple(source), p.state_indices) + action = getindex.(Tuple(action), p.action_indices) + j = p.linear_index[source..., action...] + + return j +end \ No newline at end of file diff --git a/src/probabilities/probabilities.jl b/src/probabilities/probabilities.jl index 5cf59748..369db123 100644 --- a/src/probabilities/probabilities.jl +++ b/src/probabilities/probabilities.jl @@ -1,9 +1,28 @@ -abstract type AbstractMarginal end +abstract type AbstractAmbiguitySets end +abstract type AbstractAmbiguitySet end + +""" + num_sets(ambiguity_sets::AbstractAmbiguitySets) + +Return the number of ambiguity sets in the AbstractAmbiguitySets object. +""" +function num_sets end +export num_sets -export lower, upper, gap +""" + support(ambiguity_set::AbstractAmbiguitySet) + +Return the support (set of indices with non-zero probability) of the ambiguity set. +""" +function support end +export support include("IntervalAmbiguitySets.jl") -export IntervalAmbiguitySets +export IntervalAmbiguitySets, lower, upper, gap + +abstract type AbstractMarginal end +include("Marginal.jl") +export SARectangularMarginal, Marginal, ambiguity_sets, state_variables, action_variables, source_shape, action_shape, num_target include("TransitionFunction.jl") export TransitionFunction, transition diff --git a/src/robust_value_iteration.jl b/src/robust_value_iteration.jl index f350dbff..c38e332b 100644 --- a/src/robust_value_iteration.jl +++ b/src/robust_value_iteration.jl @@ -161,7 +161,7 @@ end function ValueFunction(problem::AbstractIntervalMDPProblem) mp = system(problem) - previous = arrayfactory(mp, valuetype(mp), product_num_states(mp)) + previous = arrayfactory(mp, valuetype(mp), state_variables(mp)) current = copy(previous) return ValueFunction(previous, current) diff --git a/src/specification.jl b/src/specification.jl index 1a8b240d..893f1943 100644 --- a/src/specification.jl +++ b/src/specification.jl @@ -667,7 +667,7 @@ Return the set of states to avoid. avoid(prop::ExactTimeReachAvoid) = prop.avoid function checkstatebounds(states, system::IntervalMarkovProcess) - pns = product_num_states(system) + pns = state_variables(system) for j in states j = Tuple(j) @@ -854,7 +854,7 @@ postprocess_value_function!(value_function, ::AbstractReward) = value_function function checkreward(prop::AbstractReward, system) checkdevice(reward(prop), system) - pns = product_num_states(system) + pns = state_variables(system) if size(reward(prop)) != pns throw( DimensionMismatch( diff --git a/src/strategy_cache.jl b/src/strategy_cache.jl index 26585d87..5585f12b 100644 --- a/src/strategy_cache.jl +++ b/src/strategy_cache.jl @@ -15,10 +15,8 @@ struct NoStrategyCache <: OptimizingStrategyCache end function construct_strategy_cache( ::Union{ - IntervalAmbiguitySet, - OrthogonalIntervalProbabilities, - MixtureIntervalProbabilities, - StochasticProcess, + <:AbstractAmbiguitySets, + <:StochasticProcess, }, ) return NoStrategyCache() @@ -66,7 +64,8 @@ end function construct_strategy_cache(problem::ControlSynthesisProblem, time_varying::Val{true}) mp = system(problem) - cur_strategy = arrayfactory(mp, Int32, product_num_states(mp)) + N = length(action_variables(mp)) + cur_strategy = arrayfactory(mp, NTuple{N, Int32}, source_shape(mp)) return TimeVaryingStrategyCache(cur_strategy) end @@ -106,7 +105,8 @@ function construct_strategy_cache( time_varying::Val{false}, ) mp = system(problem) - strategy = arrayfactory(mp, Int32, product_num_states(mp)) + N = length(action_variables(mp)) + cur_strategy = arrayfactory(mp, NTuple{N, Int32}, source_shape(mp)) return StationaryStrategyCache(strategy) end @@ -136,6 +136,7 @@ function _extract_strategy!(cur_strategy, values, neutral, j, maximize) opt_val, opt_index = neutral + # TODO: update to accept state variables for (i, v) in enumerate(values) if gt(v, opt_val) opt_val = v diff --git a/src/utils.jl b/src/utils.jl index 3ef303e9..ed1cedab 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,23 +1,15 @@ -@inline @inbounds maxdiff(x::V) where {V <: AbstractVector} = - maximum(x[i + 1] - x[i] for i in 1:(length(x) - 1)) -@inline @inbounds mindiff(x::V) where {V <: AbstractVector} = - minimum(x[i + 1] - x[i] for i in 1:(length(x) - 1)) - arrayfactory(mp::ProductProcess, T, num_states) = arrayfactory(markov_process(mp), T, num_states) -arrayfactory(mp::IntervalMarkovProcess, T, num_states) = - arrayfactory(transition_prob(mp), T, num_states) -arrayfactory(prob::MixtureIntervalProbabilities, T, num_states) = - arrayfactory(first(prob), T, num_states) -arrayfactory(prob::OrthogonalIntervalProbabilities, T, num_states) = - arrayfactory(first(prob), T, num_states) -arrayfactory(prob::IntervalAmbiguitySet, T, num_states) = - arrayfactory(gap(prob), T, num_states) +arrayfactory(mp::FactoredRMDP, T, num_states) = + arrayfactory(mp.transition[1], T, num_states) +arrayfactory(marginal::Marginal, T, num_states) = + arrayfactory(marginal.ambiguity_sets, T, num_states) +arrayfactory(prob::IntervalAmbiguitySets, T, num_states) = + arrayfactory(prob.gap, T, num_states) arrayfactory(::MR, T, num_states) where {MR <: AbstractArray} = zeros(T, num_states) valuetype(mp::ProductProcess) = valuetype(markov_process(mp)) -valuetype(mp::IntervalMarkovProcess) = valuetype(transition_prob(mp)) -valuetype(prob::MixtureIntervalProbabilities) = valuetype(first(prob)) -valuetype(prob::OrthogonalIntervalProbabilities) = valuetype(first(prob)) -valuetype(prob::IntervalAmbiguitySet) = valuetype(gap(prob)) +valuetype(mp::FactoredRMDP) = valuetype(mp.transition[1]) +valuetype(marginal::Marginal) = valuetype(marginal.ambiguity_sets) +valuetype(prob::IntervalAmbiguitySets) = valuetype(prob.gap) valuetype(::MR) where {R, MR <: AbstractArray{R}} = R diff --git a/src/workspace.jl b/src/workspace.jl index ca612dde..f61f7b17 100644 --- a/src/workspace.jl +++ b/src/workspace.jl @@ -1,465 +1,144 @@ -function construct_workspace end - -struct ProductWorkspace{W, MT <: AbstractArray} - underlying_workspace::W - intermediate_values::MT -end - """ - construct_workspace(proc::ProductProcess) + construct_workspace(sys::StochasticProcess) Construct a workspace for computing the Bellman update, given a value function. If the Bellman update is used in a hot-loop, it is more efficient to use this function to preallocate the workspace and reuse across iterations. -The underlying workspace type is determined by the type and size of the transition probability matrix, +The workspace type is determined by the system type, the type (including device) and size of the ambiguity sets, as well as the number of threads available. """ +function construct_workspace end + +struct ProductWorkspace{W, MT <: AbstractArray} + underlying_workspace::W + intermediate_values::MT +end + function construct_workspace(proc::ProductProcess) mp = markov_process(proc) underlying_workspace = construct_workspace(mp) - intermediate_values = arrayfactory(mp, valuetype(mp), product_num_states(mp)) + intermediate_values = arrayfactory(mp, valuetype(mp), state_variables(mp)) return ProductWorkspace(underlying_workspace, intermediate_values) end -""" - construct_workspace(mp::IntervalMarkovProcess) - -Construct a workspace for computing the Bellman update, given a value function. -If the Bellman update is used in a hot-loop, it is more efficient to use this function -to preallocate the workspace and reuse across iterations. - -The workspace type is determined by the type and size of the transition probability matrix, -as well as the number of threads available. -""" -construct_workspace(mp::IntervalMarkovProcess) = - construct_workspace(transition_prob(mp), max_actions(mp)) - # Dense -struct DenseWorkspace{T <: Real} +struct DenseIntervalWorkspace{T <: Real} + budget::Vector{T} scratch::Vector{Int32} permutation::Vector{Int32} actions::Vector{T} end -function DenseWorkspace(p::AbstractMatrix{T}, max_actions) where {T <: Real} - n = size(p, 1) - scratch = Vector{Int32}(undef, n) - perm = Vector{Int32}(undef, n) - actions = Vector{T}(undef, max_actions) - return DenseWorkspace(scratch, perm, actions) +function DenseIntervalWorkspace(ambiguity_set::IntervalAmbiguitySets{R}, nactions) where {R <: Real} + budget = 1 .- vec(sum(ambiguity_set.lower; dims = 1)) + scratch = Vector{Int32}(undef, num_target(ambiguity_set)) + perm = Vector{Int32}(undef, num_target(ambiguity_set)) + actions = Vector{R}(undef, nactions) + return DenseIntervalWorkspace(budget, scratch, perm, actions) end -permutation(ws::DenseWorkspace) = ws.permutation -scratch(ws::DenseWorkspace) = ws.scratch +permutation(ws::DenseIntervalWorkspace) = ws.permutation +scratch(ws::DenseIntervalWorkspace) = ws.scratch -struct ThreadedDenseWorkspace{T <: Real} - thread_workspaces::Vector{DenseWorkspace{T}} +struct ThreadedDenseIntervalWorkspace{T <: Real} + thread_workspaces::Vector{DenseIntervalWorkspace{T}} end -function ThreadedDenseWorkspace(p::AbstractMatrix{T}, max_actions) where {T <: Real} - n = size(p, 1) - scratch = Vector{Int32}(undef, n) - perm = Vector{Int32}(undef, n) +function ThreadedDenseIntervalWorkspace(ambiguity_set::IntervalAmbiguitySets{R}, nactions) where {R <: Real} + budget = 1 .- vec(sum(ambiguity_set.lower; dims = 1)) + scratch = Vector{Int32}(undef, num_target(ambiguity_set)) + perm = Vector{Int32}(undef, num_target(ambiguity_set)) workspaces = [ - DenseWorkspace(scratch, perm, Vector{T}(undef, max_actions)) for + DenseIntervalWorkspace(budget, scratch, perm, Vector{R}(undef, nactions)) for _ in 1:Threads.nthreads() ] - return ThreadedDenseWorkspace(workspaces) + return ThreadedDenseIntervalWorkspace(workspaces) end -Base.getindex(ws::ThreadedDenseWorkspace, i) = ws.thread_workspaces[i] +Base.getindex(ws::ThreadedDenseIntervalWorkspace, i) = ws.thread_workspaces[i] ## permutation and scratch space is shared across threads -permutation(ws::ThreadedDenseWorkspace) = permutation(first(ws.thread_workspaces)) -scratch(ws::ThreadedDenseWorkspace) = scratch(first(ws.thread_workspaces)) - -""" - construct_workspace(prob::IntervalProbabilities) +permutation(ws::ThreadedDenseIntervalWorkspace) = permutation(first(ws.thread_workspaces)) +scratch(ws::ThreadedDenseIntervalWorkspace) = scratch(first(ws.thread_workspaces)) -Construct a workspace for computing the Bellman update, given a value function. -If the Bellman update is used in a hot-loop, it is more efficient to use this function -to preallocate the workspace and reuse across iterations. - -The workspace type is determined by the type and size of the transition probability matrix, -as well as the number of threads available. -""" function construct_workspace( - prob::IntervalAmbiguitySet{R, VR, MR}, - max_actions = 1; + prob::IntervalAmbiguitySets{R, MR}; threshold = 10, -) where {R, VR, MR <: AbstractMatrix{R}} - if Threads.nthreads() == 1 || size(gap(prob), 2) <= threshold - return DenseWorkspace(gap(prob), max_actions) +) where {R, MR <: AbstractMatrix{R}} + if Threads.nthreads() == 1 || num_sets(prob) <= threshold + return DenseIntervalWorkspace(prob, 1) else - return ThreadedDenseWorkspace(gap(prob), max_actions) + return ThreadedDenseIntervalWorkspace(prob, 1) end end -# Sparse -struct SparseWorkspace{T <: Real} - scratch::Vector{Tuple{T, T}} - values_gaps::Vector{Tuple{T, T}} - actions::Vector{T} -end - -function SparseWorkspace(p::AbstractSparseMatrix{T}, max_actions) where {T <: Real} - max_nonzeros = maximum(map(nnz, eachcol(p))) - scratch = Vector{Tuple{T, T}}(undef, max_nonzeros) - values_gaps = Vector{Tuple{T, T}}(undef, max_nonzeros) - actions = Vector{T}(undef, max_actions) - return SparseWorkspace(scratch, values_gaps, actions) -end - -scratch(ws::SparseWorkspace) = ws.scratch - -struct ThreadedSparseWorkspace{T} - thread_workspaces::Vector{SparseWorkspace{T}} -end - -function ThreadedSparseWorkspace(p::AbstractSparseMatrix, max_actions) - nthreads = Threads.nthreads() - thread_workspaces = [SparseWorkspace(p, max_actions) for _ in 1:nthreads] - return ThreadedSparseWorkspace(thread_workspaces) -end - -Base.getindex(ws::ThreadedSparseWorkspace, i) = ws.thread_workspaces[i] - function construct_workspace( - prob::IntervalAmbiguitySet{R, VR, MR}, - max_actions = 1; + sys::FactoredRMDP{N, M, <:Tuple{<:Marginal{<:IntervalAmbiguitySets{R, MR}}}}; threshold = 10, -) where {R, VR, MR <: AbstractSparseMatrix{R}} - if Threads.nthreads() == 1 || size(gap(prob), 2) <= threshold - return SparseWorkspace(gap(prob), max_actions) +) where {N, M, R, MR <: AbstractMatrix{R}} + prob = sys.transition[1].ambiguity_sets + if Threads.nthreads() == 1 || num_states(sys) <= threshold + return DenseIntervalWorkspace(prob, num_actions(sys)) else - return ThreadedSparseWorkspace(gap(prob), max_actions) - end -end - -## Orthogonal -abstract type SimpleOrthogonalWorkspace end - -# Dense -struct DenseOrthogonalWorkspace{N, M, T <: Real} <: SimpleOrthogonalWorkspace - expectation_cache::NTuple{N, Vector{T}} - first_level_perm::Array{Int32, M} - permutation::Vector{Int32} - scratch::Vector{Int32} - actions::Vector{T} -end - -function DenseOrthogonalWorkspace( - p::OrthogonalIntervalProbabilities{N, <:IntervalAmbiguitySet{R}}, - max_actions, -) where {N, R} - pns = num_target(p) - nmax = maximum(pns) - - first_level_perm = Array{Int32}(undef, pns) - perm = Vector{Int32}(undef, nmax) - scratch = Vector{Int32}(undef, nmax) - expectation_cache = NTuple{N - 1, Vector{R}}(Vector{R}(undef, n) for n in pns[2:end]) - actions = Vector{R}(undef, max_actions) - return DenseOrthogonalWorkspace( - expectation_cache, - first_level_perm, - perm, - scratch, - actions, - ) -end -permutation(ws::DenseOrthogonalWorkspace) = ws.permutation -scratch(ws::DenseOrthogonalWorkspace) = ws.scratch -first_level_perm(ws::DenseOrthogonalWorkspace) = ws.first_level_perm -actions(ws::DenseOrthogonalWorkspace) = ws.actions - -struct ThreadedDenseOrthogonalWorkspace{N, M, T} - thread_workspaces::Vector{DenseOrthogonalWorkspace{N, M, T}} -end - -function ThreadedDenseOrthogonalWorkspace( - p::OrthogonalIntervalProbabilities{N, <:IntervalAmbiguitySet{R}}, - max_actions, -) where {N, R} - nthreads = Threads.nthreads() - pns = num_target(p) - nmax = maximum(pns) - - first_level_perm = Array{Int32}(undef, pns) - - workspaces = map(1:nthreads) do _ - perm = Vector{Int32}(undef, nmax) - scratch = Vector{Int32}(undef, nmax) - expectation_cache = - NTuple{N - 1, Vector{R}}(Vector{R}(undef, n) for n in pns[2:end]) - actions = Vector{R}(undef, max_actions) - return DenseOrthogonalWorkspace( - expectation_cache, - first_level_perm, - perm, - scratch, - actions, - ) - end - - return ThreadedDenseOrthogonalWorkspace(workspaces) -end - -Base.getindex(ws::ThreadedDenseOrthogonalWorkspace, i) = ws.thread_workspaces[i] - -""" - construct_workspace(prob::OrthogonalIntervalProbabilities) - -Construct a workspace for computing the Bellman update, given a value function. -If the Bellman update is used in a hot-loop, it is more efficient to use this function -to preallocate the workspace and reuse across iterations. - -The workspace type is determined by the type and size of the transition probability matrix, -as well as the number of threads available. -""" -function construct_workspace( - p::OrthogonalIntervalProbabilities{N, <:IntervalAmbiguitySet{R, VR, MR}}, - max_actions = 1, -) where {N, R, VR, MR <: AbstractMatrix{R}} - if Threads.nthreads() == 1 - return DenseOrthogonalWorkspace(p, max_actions) - else - return ThreadedDenseOrthogonalWorkspace(p, max_actions) + return ThreadedDenseIntervalWorkspace(prob, num_actions(sys)) end end # Sparse -struct SparseOrthogonalWorkspace{N, T <: Real} <: SimpleOrthogonalWorkspace - expectation_cache::NTuple{N, Vector{T}} - values_gaps::Vector{Tuple{T, T}} +struct SparseIntervalWorkspace{T <: Real} + budget::Vector{T} scratch::Vector{Tuple{T, T}} + values_gaps::Vector{Tuple{T, T}} actions::Vector{T} end -scratch(ws::SparseOrthogonalWorkspace) = ws.scratch -actions(ws::SparseOrthogonalWorkspace) = ws.actions -function SparseOrthogonalWorkspace( - p::OrthogonalIntervalProbabilities{N, <:IntervalAmbiguitySet{R, VR, MR}}, - max_actions, -) where {N, R, VR, MR <: AbstractSparseMatrix{R}} - max_nonzeros_per_prob = [maximum(map(nnz, eachcol(gap(pᵢ)))) for pᵢ in p] - max_nonzeros = maximum(max_nonzeros_per_prob) +function SparseIntervalWorkspace(ambiguity_sets::IntervalAmbiguitySets{R}, nactions) where {R <: Real} + max_support = maximum(nnz, ambiguity_sets) - scratch = Vector{Tuple{R, R}}(undef, max_nonzeros) - values_gaps = Vector{Tuple{R, R}}(undef, max_nonzeros) - expectation_cache = - NTuple{N - 1, Vector{R}}(Vector{R}(undef, n) for n in max_nonzeros_per_prob[2:end]) - actions = Vector{R}(undef, max_actions) - - return SparseOrthogonalWorkspace(expectation_cache, values_gaps, scratch, actions) + budget = 1 .- vec(sum(ambiguity_sets.lower; dims = 1)) + scratch = Vector{Tuple{R, R}}(undef, max_support) + values_gaps = Vector{Tuple{R, R}}(undef, max_support) + actions = Vector{R}(undef, nactions) + return SparseIntervalWorkspace(budget, scratch, values_gaps, actions) end -struct ThreadedSparseOrthogonalWorkspace{N, T} - thread_workspaces::Vector{SparseOrthogonalWorkspace{N, T}} +scratch(ws::SparseIntervalWorkspace) = ws.scratch + +struct ThreadedSparseIntervalWorkspace{T} + thread_workspaces::Vector{SparseIntervalWorkspace{T}} end -function ThreadedSparseOrthogonalWorkspace(p::OrthogonalIntervalProbabilities, max_actions) +function ThreadedSparseIntervalWorkspace(ambiguity_sets::IntervalAmbiguitySets, nactions) nthreads = Threads.nthreads() - thread_workspaces = [SparseOrthogonalWorkspace(p, max_actions) for _ in 1:nthreads] - - return ThreadedSparseOrthogonalWorkspace(thread_workspaces) + thread_workspaces = [SparseIntervalWorkspace(ambiguity_sets, nactions) for _ in 1:nthreads] + return ThreadedSparseIntervalWorkspace(thread_workspaces) end -Base.getindex(ws::ThreadedSparseOrthogonalWorkspace, i) = ws.thread_workspaces[i] - -""" - construct_workspace(prob::OrthogonalIntervalProbabilities) - -Construct a workspace for computing the Bellman update, given a value function. -If the Bellman update is used in a hot-loop, it is more efficient to use this function -to preallocate the workspace and reuse across iterations. +Base.getindex(ws::ThreadedSparseIntervalWorkspace, i) = ws.thread_workspaces[i] -The workspace type is determined by the type and size of the transition probability matrix, -as well as the number of threads available. -""" function construct_workspace( - p::OrthogonalIntervalProbabilities{N, <:IntervalAmbiguitySet{R, VR, MR}}, - max_actions = 1, -) where {N, R, VR, MR <: AbstractSparseMatrix{R}} - if Threads.nthreads() == 1 - return SparseOrthogonalWorkspace(p, max_actions) + prob::IntervalAmbiguitySets{R, MR}; + threshold = 10, +) where {R, MR <: AbstractSparseMatrix{R}} + if Threads.nthreads() == 1 || num_sets(prob) <= threshold + return SparseIntervalWorkspace(prob, 1) else - return ThreadedSparseOrthogonalWorkspace(p, max_actions) + return ThreadedSparseIntervalWorkspace(prob, 1) end end -## Mixture - -# Sequential -struct MixtureWorkspace{W <: SimpleOrthogonalWorkspace, R} - orthogonal_workspace::W - mixture_cache::Vector{R} - scratch::Vector{Int32} - permutation::Vector{Int32} -end -permutation(ws::MixtureWorkspace) = ws.permutation -scratch(ws::MixtureWorkspace) = ws.scratch -actions(ws::MixtureWorkspace) = actions(ws.orthogonal_workspace) - -function MixtureWorkspace( - p::MixtureIntervalProbabilities{ - N, - <:OrthogonalIntervalProbabilities{M, <:IntervalAmbiguitySet{R, VR, MR}}, - }, - max_actions, -) where {N, M, R, VR, MR <: AbstractMatrix{R}} - mixture_cache = Vector{R}(undef, N) - scratch = Vector{Int32}(undef, N) - permutation = Vector{Int32}(undef, N) - return MixtureWorkspace( - DenseOrthogonalWorkspace(first(p), max_actions), - mixture_cache, - scratch, - permutation, - ) -end - -function MixtureWorkspace( - p::MixtureIntervalProbabilities{ - N, - <:OrthogonalIntervalProbabilities{M, <:IntervalAmbiguitySet{R, VR, MR}}, - }, - max_actions, -) where {N, M, R, VR, MR <: AbstractSparseMatrix{R}} - mixture_cache = Vector{R}(undef, N) - mixture_scratch = Vector{Int32}(undef, N) - mixture_permutation = Vector{Int32}(undef, N) - - max_nonzeros_per_prob = map(1:M) do l - return maximum(1:N) do k - return maximum(map(nnz, eachcol(gap(p[k], l)))) - end - end - max_nonzeros = maximum(max_nonzeros_per_prob) - - scratch = Vector{Tuple{R, R}}(undef, max_nonzeros) - values_gaps = Vector{Tuple{R, R}}(undef, max_nonzeros) - expectation_cache = - NTuple{M - 1, Vector{R}}(Vector{R}(undef, n) for n in max_nonzeros_per_prob[2:end]) - actions = Vector{R}(undef, max_actions) - - return MixtureWorkspace( - SparseOrthogonalWorkspace(expectation_cache, values_gaps, scratch, actions), - mixture_cache, - mixture_scratch, - mixture_permutation, - ) -end - -# Threaded -struct ThreadedMixtureWorkspace{W <: SimpleOrthogonalWorkspace, V <: MixtureWorkspace{W}} - thread_workspaces::Vector{V} -end - -function ThreadedMixtureWorkspace( - p::MixtureIntervalProbabilities{ - N, - <:OrthogonalIntervalProbabilities{M, <:IntervalAmbiguitySet{R, VR, MR}}, - }, - max_actions, -) where {N, M, R, VR, MR <: AbstractMatrix{R}} - nthreads = Threads.nthreads() - pns = num_target(p) - nmax = maximum(pns) - - first_level_perm = Array{Int32}(undef, pns) - - workspaces = map(1:nthreads) do _ - mixture_cache = Vector{R}(undef, N) - mixture_scratch = Vector{Int32}(undef, N) - mixture_permutation = Vector{Int32}(undef, N) - - perm = Vector{Int32}(undef, nmax) - scratch = Vector{Int32}(undef, nmax) - expectation_cache = - NTuple{M - 1, Vector{R}}(Vector{R}(undef, n) for n in pns[2:end]) - actions = Vector{R}(undef, max_actions) - return MixtureWorkspace( - DenseOrthogonalWorkspace( - expectation_cache, - first_level_perm, - perm, - scratch, - actions, - ), - mixture_cache, - mixture_scratch, - mixture_permutation, - ) - end - - return ThreadedMixtureWorkspace(workspaces) -end - -function ThreadedMixtureWorkspace( - p::MixtureIntervalProbabilities{ - N, - <:OrthogonalIntervalProbabilities{M, <:IntervalAmbiguitySet{R, VR, MR}}, - }, - max_actions, -) where {N, M, R, VR, MR <: AbstractSparseMatrix{R}} - nthreads = Threads.nthreads() - - max_nonzeros_per_prob = map(1:M) do l - return maximum(1:N) do k - return maximum(map(nnz, eachcol(gap(p[k], l)))) - end - end - max_nonzeros = maximum(max_nonzeros_per_prob) - - workspaces = map(1:nthreads) do _ - mixture_cache = Vector{R}(undef, N) - mixture_scratch = Vector{Int32}(undef, N) - mixture_permutation = Vector{Int32}(undef, N) - - scratch = Vector{Tuple{R, R}}(undef, max_nonzeros) - values_gaps = Vector{Tuple{R, R}}(undef, max_nonzeros) - expectation_cache = NTuple{M - 1, Vector{R}}( - Vector{R}(undef, n) for n in max_nonzeros_per_prob[2:end] - ) - actions = Vector{R}(undef, max_actions) - return MixtureWorkspace( - SparseOrthogonalWorkspace(expectation_cache, values_gaps, scratch, actions), - mixture_cache, - mixture_scratch, - mixture_permutation, - ) - end - - return ThreadedMixtureWorkspace(workspaces) -end - -Base.getindex(ws::ThreadedMixtureWorkspace, i) = ws.thread_workspaces[i] - -""" - construct_workspace(prob::MixtureIntervalProbabilities) - -Construct a workspace for computing the Bellman update, given a value function. -If the Bellman update is used in a hot-loop, it is more efficient to use this function -to preallocate the workspace and reuse across iterations. - -The workspace type is determined by the type and size of the transition probability matrix, -as well as the number of threads available. -""" function construct_workspace( - p::MixtureIntervalProbabilities{ - N, - <:OrthogonalIntervalProbabilities{M, <:IntervalAmbiguitySet{R, VR, MR}}, - }, - max_actions = 1, -) where {N, M, R, VR, MR <: Union{AbstractMatrix{R}, AbstractSparseMatrix{R}}} - if Threads.nthreads() == 1 - return MixtureWorkspace(p, max_actions) + sys::FactoredRMDP{N, M, <:Tuple{<:Marginal{<:IntervalAmbiguitySets{R, MR}}}}; + threshold = 10, +) where {N, M, R, MR <: AbstractSparseMatrix{R}} + prob = sys.transition[1].ambiguity_sets + if Threads.nthreads() == 1 || num_states(sys) <= threshold + return SparseIntervalWorkspace(prob, num_actions(sys)) else - return ThreadedMixtureWorkspace(p, max_actions) + return ThreadedSparseIntervalWorkspace(prob, num_actions(sys)) end end diff --git a/test/base/base.jl b/test/base/base.jl index 8c27e3f8..878998b2 100644 --- a/test/base/base.jl +++ b/test/base/base.jl @@ -2,14 +2,14 @@ test_files = [ "bellman.jl", "vi.jl", - "imdp.jl", - "synthesis.jl", - "specification.jl", - "orthogonal.jl", - "mixture.jl", - "labelling.jl", - "dfa.jl", - "product.jl", + # "imdp.jl", + # "synthesis.jl", + # "specification.jl", + # "orthogonal.jl", + # "mixture.jl", + # "labelling.jl", + # "dfa.jl", + # "product.jl", ] for f in test_files @testset "base/$f" include(f) diff --git a/test/base/bellman.jl b/test/base/bellman.jl index 5d315a90..de72e517 100644 --- a/test/base/bellman.jl +++ b/test/base/bellman.jl @@ -3,7 +3,7 @@ using IntervalMDP for N in [Float32, Float64, Rational{BigInt}] @testset "N = $N" begin - prob = IntervalProbabilities(; + prob = IntervalAmbiguitySets(; lower = N[0 1//2; 1//10 3//10; 2//10 1//10], upper = N[5//10 7//10; 6//10 5//10; 7//10 3//10], ) @@ -20,13 +20,12 @@ for N in [Float32, Float64, Rational{BigInt}] strategy_cache, Vres, V, - prob, - stateptr(prob); + prob; upper_bound = true, ) @test Vres ≈ N[27 // 10, 17 // 10] # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] - ws = IntervalMDP.DenseWorkspace(gap(prob), 1) + ws = IntervalMDP.DenseIntervalWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( @@ -34,13 +33,12 @@ for N in [Float32, Float64, Rational{BigInt}] strategy_cache, Vres, V, - prob, - stateptr(prob); + prob; upper_bound = true, ) @test Vres ≈ N[27 // 10, 17 // 10] - ws = IntervalMDP.ThreadedDenseWorkspace(gap(prob), 1) + ws = IntervalMDP.ThreadedDenseIntervalWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( @@ -48,8 +46,7 @@ for N in [Float32, Float64, Rational{BigInt}] strategy_cache, Vres, V, - prob, - stateptr(prob); + prob; upper_bound = true, ) @test Vres ≈ N[27 // 10, 17 // 10] @@ -65,13 +62,12 @@ for N in [Float32, Float64, Rational{BigInt}] strategy_cache, Vres, V, - prob, - stateptr(prob); + prob; upper_bound = false, ) @test Vres ≈ N[17 // 10, 15 // 10] # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3] - ws = IntervalMDP.DenseWorkspace(gap(prob), 1) + ws = IntervalMDP.DenseIntervalWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( @@ -79,13 +75,12 @@ for N in [Float32, Float64, Rational{BigInt}] strategy_cache, Vres, V, - prob, - stateptr(prob); + prob; upper_bound = false, ) @test Vres ≈ N[17 // 10, 15 // 10] - ws = IntervalMDP.ThreadedDenseWorkspace(gap(prob), 1) + ws = IntervalMDP.ThreadedDenseIntervalWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( @@ -93,8 +88,7 @@ for N in [Float32, Float64, Rational{BigInt}] strategy_cache, Vres, V, - prob, - stateptr(prob); + prob; upper_bound = false, ) @test Vres ≈ N[17 // 10, 15 // 10] diff --git a/test/base/vi.jl b/test/base/vi.jl index ed91afec..41974f04 100644 --- a/test/base/vi.jl +++ b/test/base/vi.jl @@ -1,7 +1,7 @@ using Revise, Test using IntervalMDP -prob = IntervalProbabilities(; +prob = IntervalAmbiguitySets(; lower = [ 0.0 0.5 0.0 0.1 0.3 0.0 diff --git a/test/runtests.jl b/test/runtests.jl index 67a5b9b5..b081978f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,6 +3,6 @@ using Test @testset verbose = true "IntervalMDP.jl" begin @testset verbose = true "base" include("base/base.jl") @testset verbose = true "sparse" include("sparse/sparse.jl") - @testset verbose = true "data" include("data/data.jl") - @testset verbose = true "cuda" include("cuda/cuda.jl") + # @testset verbose = true "data" include("data/data.jl") + # @testset verbose = true "cuda" include("cuda/cuda.jl") end diff --git a/test/sparse/bellman.jl b/test/sparse/bellman.jl index d2627b35..5c3a2e19 100644 --- a/test/sparse/bellman.jl +++ b/test/sparse/bellman.jl @@ -3,7 +3,7 @@ using IntervalMDP, SparseArrays for N in [Float32, Float64, Rational{BigInt}] @testset "N = $N" begin - prob = IntervalProbabilities(; + prob = IntervalAmbiguitySets(; lower = sparse_hcat( SparseVector(15, [4, 10], N[1 // 10, 2 // 10]), SparseVector(15, [5, 6, 7], N[5 // 10, 3 // 10, 1 // 10]), @@ -26,13 +26,12 @@ for N in [Float32, Float64, Rational{BigInt}] strategy_cache, Vres, V, - prob, - stateptr(prob); + prob; upper_bound = true, ) @test Vres ≈ N[82 // 10, 57 // 10] # [0.3 * 4 + 0.7 * 10, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] - ws = IntervalMDP.SparseWorkspace(gap(prob), 1) + ws = IntervalMDP.SparseIntervalWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( @@ -40,13 +39,12 @@ for N in [Float32, Float64, Rational{BigInt}] strategy_cache, Vres, V, - prob, - stateptr(prob); + prob; upper_bound = true, ) @test Vres ≈ N[82 // 10, 57 // 10] - ws = IntervalMDP.ThreadedSparseWorkspace(gap(prob), 1) + ws = IntervalMDP.ThreadedSparseIntervalWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( @@ -54,8 +52,7 @@ for N in [Float32, Float64, Rational{BigInt}] strategy_cache, Vres, V, - prob, - stateptr(prob); + prob; upper_bound = true, ) @test Vres ≈ N[82 // 10, 57 // 10] @@ -71,13 +68,12 @@ for N in [Float32, Float64, Rational{BigInt}] strategy_cache, Vres, V, - prob, - stateptr(prob); + prob; upper_bound = false, ) @test Vres ≈ N[37 // 10, 55 // 10] # [0.5 * 1 + 0.3 * 4 + 0.2 * 10, 0.6 * 5 + 0.3 * 6 + 0.1 * 7] - ws = IntervalMDP.SparseWorkspace(gap(prob), 1) + ws = IntervalMDP.SparseIntervalWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( @@ -85,13 +81,12 @@ for N in [Float32, Float64, Rational{BigInt}] strategy_cache, Vres, V, - prob, - stateptr(prob); + prob; upper_bound = false, ) @test Vres ≈ N[37 // 10, 55 // 10] - ws = IntervalMDP.ThreadedSparseWorkspace(gap(prob), 1) + ws = IntervalMDP.ThreadedSparseIntervalWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( @@ -99,8 +94,7 @@ for N in [Float32, Float64, Rational{BigInt}] strategy_cache, Vres, V, - prob, - stateptr(prob); + prob; upper_bound = false, ) @test Vres ≈ N[37 // 10, 55 // 10] diff --git a/test/sparse/sparse.jl b/test/sparse/sparse.jl index 814fe782..32a41459 100644 --- a/test/sparse/sparse.jl +++ b/test/sparse/sparse.jl @@ -1,5 +1,12 @@ -test_files = ["bellman.jl", "vi.jl", "imdp.jl", "synthesis.jl", "orthogonal.jl"] +test_files = [ + "bellman.jl", + "vi.jl", + # "imdp.jl", + # "synthesis.jl", + # "orthogonal.jl" +] + for f in test_files @testset "sparse/$f" include(f) end diff --git a/test/sparse/vi.jl b/test/sparse/vi.jl index fb3cd376..2182895d 100644 --- a/test/sparse/vi.jl +++ b/test/sparse/vi.jl @@ -1,7 +1,7 @@ using Revise, Test using IntervalMDP, SparseArrays -prob = IntervalProbabilities(; +prob = IntervalAmbiguitySets(; lower = sparse_hcat( SparseVector(3, [2, 3], [0.1, 0.2]), SparseVector(3, [1, 2, 3], [0.5, 0.3, 0.1]), From b23c3897daef05e7c33a78bcce72e73a8f05d348 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Sun, 31 Aug 2025 22:01:19 +0200 Subject: [PATCH 03/71] Remove unused sub2ind --- src/models/FactoredRobustMarkovDecisionProcess.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl index cead9d20..b8badbac 100644 --- a/src/models/FactoredRobustMarkovDecisionProcess.jl +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -116,7 +116,6 @@ initial_states(rmdp::FactoredRobustMarkovDecisionProcess) = rmdp.initial_states source_shape(m::FactoredRobustMarkovDecisionProcess) = m.source_dims action_shape(m::FactoredRobustMarkovDecisionProcess) = m.action_vars -sub2ind(rmdp::FactoredRMDP, r, jₛ, jₐ) = sub2ind(rmdp.transition[r], jₛ, jₐ) -function Base.getindex(rmdp::FactoredRMDP, r::Int, jₛ, jₐ) - return rmdp.transition[r][jₛ, jₐ] +function Base.getindex(rmdp::FactoredRMDP, r) + return rmdp.transition[r] end \ No newline at end of file From 1af647b75307ead9b7c92d3f817f2b650034129a Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 1 Sep 2025 18:31:18 +0200 Subject: [PATCH 04/71] Add IMDP constructor, fix strategies, fix indexing order, reenable and fix most tests --- ext/IntervalMDPCudaExt.jl | 2 +- ext/cuda/interval_probabilities.jl | 2 - src/bellman.jl | 12 ++-- src/models/IntervalMarkovChain.jl | 26 +++++--- src/models/IntervalMarkovDecisionProcess.jl | 70 +++++++++++++++++---- src/models/ProductProcess.jl | 20 +++--- src/models/models.jl | 4 +- src/probabilities/IntervalAmbiguitySets.jl | 4 +- src/probabilities/Marginal.jl | 10 +-- src/robust_value_iteration.jl | 5 +- src/strategy.jl | 24 ++++--- src/strategy_cache.jl | 52 +++++++-------- src/utils.jl | 2 +- test/base/base.jl | 12 ++-- test/base/imdp.jl | 29 +++++---- test/base/product.jl | 34 +++------- test/base/specification.jl | 12 ++-- test/base/synthesis.jl | 63 ++++++------------- test/sparse/imdp.jl | 24 +++---- test/sparse/sparse.jl | 4 +- test/sparse/synthesis.jl | 57 ++++++++++++----- 21 files changed, 258 insertions(+), 210 deletions(-) diff --git a/ext/IntervalMDPCudaExt.jl b/ext/IntervalMDPCudaExt.jl index f0633083..9af2d4d3 100644 --- a/ext/IntervalMDPCudaExt.jl +++ b/ext/IntervalMDPCudaExt.jl @@ -121,7 +121,7 @@ IntervalMDP.arrayfactory( ::MR, T, num_states, -) where {R, MR <: Union{CuSparseMatrixCSC{R}, CuArray{R}}} = CUDA.zeros(T, num_states) +) where {R, MR <: Union{CuSparseMatrixCSC{R}, CuArray{R}}} = CuArray{T}(undef, num_states) include("cuda/utils.jl") include("cuda/array.jl") diff --git a/ext/cuda/interval_probabilities.jl b/ext/cuda/interval_probabilities.jl index c38496af..a1c4898a 100644 --- a/ext/cuda/interval_probabilities.jl +++ b/ext/cuda/interval_probabilities.jl @@ -3,8 +3,6 @@ function IntervalMDP.compute_gap( lower::M, upper::M, ) where {Tv, Ti, M <: CuSparseMatrixCSC{Tv, Ti}} - # lower = CuSparseMatrixCOO(lower) - # FIXME: This is an ugly, non-robust hack. upper = SparseMatrixCSC(upper) lower = SparseMatrixCSC(lower) diff --git a/src/bellman.jl b/src/bellman.jl index 72a78b67..58dda842 100644 --- a/src/bellman.jl +++ b/src/bellman.jl @@ -330,12 +330,12 @@ function state_bellman!( ) @inbounds begin for jₐ in CartesianIndices(action_shape(marginal)) - ambiguity_set = marginal[jₛ, jₐ] - budget = workspace.budget[sub2ind(marginal, jₛ, jₐ)] + ambiguity_set = marginal[jₐ, jₛ] + budget = workspace.budget[sub2ind(marginal, jₐ, jₛ)] workspace.actions[jₐ] = state_action_bellman(workspace, V, ambiguity_set, budget, upper_bound) end - Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, maximize) + Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, action_shape(marginal), maximize) end end @@ -350,9 +350,9 @@ function state_bellman!( maximize, ) @inbounds begin - jₐ = strategy_cache[jₛ] - ambiguity_set = marginal[jₛ, jₐ] - budget = workspace.budget[sub2ind(marginal, jₛ, jₐ)] + jₐ = CartesianIndex(strategy_cache[jₛ]) + ambiguity_set = marginal[jₐ, jₛ] + budget = workspace.budget[sub2ind(marginal, jₐ, jₛ)] Vres[jₛ] = state_action_bellman(workspace, V, ambiguity_set, budget, upper_bound) end end diff --git a/src/models/IntervalMarkovChain.jl b/src/models/IntervalMarkovChain.jl index 3706a27a..e338b01b 100644 --- a/src/models/IntervalMarkovChain.jl +++ b/src/models/IntervalMarkovChain.jl @@ -1,11 +1,12 @@ -function IntervalMarkovChain(ambiguity_set::IntervalAmbiguitySets{R, MR}, initial_states=AllStates()) where {R, MR <: AbstractMatrix{R}} - state_vars = (num_target(ambiguity_set),) +function IntervalMarkovChain(marginal::Marginal{<:IntervalAmbiguitySets}, initial_states=AllStates()) + state_vars = (Int32(num_target(marginal)),) + source_dims = source_shape(marginal) - state_indices = (1,) - action_indices = (1,) - source_dims = (num_sets(ambiguity_set),) - action_vars = (1,) - marginal = Marginal(ambiguity_set, state_indices, action_indices, source_dims, action_vars) + if action_shape(marginal) != (1,) + throw(DimensionMismatch("The action shape of the marginal must be (1,) for an IntervalMarkovChain. Got $(action_shape(marginal)).")) + end + + action_vars = (Int32(1),) return FactoredRMDP( # wrap in a FactoredRMDP for consistency state_vars, @@ -14,4 +15,15 @@ function IntervalMarkovChain(ambiguity_set::IntervalAmbiguitySets{R, MR}, initia (marginal,), initial_states, ) +end + + +function IntervalMarkovChain(ambiguity_set::IntervalAmbiguitySets, initial_states=AllStates()) + state_indices = (1,) + action_indices = (1,) + source_dims = (num_sets(ambiguity_set),) + action_vars = (1,) + marginal = Marginal(ambiguity_set, state_indices, action_indices, source_dims, action_vars) + + return IntervalMarkovChain(marginal, initial_states) end \ No newline at end of file diff --git a/src/models/IntervalMarkovDecisionProcess.jl b/src/models/IntervalMarkovDecisionProcess.jl index f636e1c9..bdb49860 100644 --- a/src/models/IntervalMarkovDecisionProcess.jl +++ b/src/models/IntervalMarkovDecisionProcess.jl @@ -1,17 +1,65 @@ +function IntervalMarkovDecisionProcess(marginal::Marginal{<:IntervalAmbiguitySets}, initial_states::InitialStates = AllStates()) + state_vars = (Int32(num_target(marginal)),) + action_vars = action_shape(marginal) + source_dims = source_shape(marginal) + transition = (marginal,) -function IntervalMarkovDecisionProcess(states::Vector{S}, actions::Vector{A}, transition_intervals::Dict{Tuple{S,A,S}, Tuple{Float64, Float64}}, rewards::Dict{Tuple{S,A}, Float64}) where {S,A} - # Validate inputs - for ((s, a, s_next), (p_min, p_max)) in transition_intervals - @assert 0.0 <= p_min <= p_max <= 1.0 "Transition probabilities must be in [0, 1] and p_min <= p_max" + return FactoredRMDP( + state_vars, + action_vars, + source_dims, + transition, + initial_states + ) +end + +function IntervalMarkovDecisionProcess(ambiguity_set::IntervalAmbiguitySets, num_actions::Int, initial_states::InitialStates = AllStates()) + state_indices = (1,) + action_indices = (1,) + + if num_sets(ambiguity_set) % num_actions != 0 + throw(ArgumentError("The number of sets in the ambiguity set must be a multiple of the number of actions.")) end - for s in states - for a in actions - total_min = sum(p_min for ((s2, a2, s3), (p_min, p_max)) in transition_intervals if s2 == s && a2 == a) - total_max = sum(p_max for ((s2, a2, s3), (p_min, p_max)) in transition_intervals if s2 == s && a2 == a) - @assert total_min <= 1.0 "Total minimum transition probability from state $s with action $a exceeds 1" - @assert total_max <= 1.0 "Total maximum transition probability from state $s with action $a exceeds 1" + + source_dims = (num_sets(ambiguity_set) ÷ num_actions,) + action_vars = (num_actions,) + marginal = Marginal(ambiguity_set, state_indices, action_indices, source_dims, action_vars) + + return IntervalMarkovDecisionProcess(marginal, initial_states) +end + +function IntervalMarkovDecisionProcess( + ps::Vector{<:IntervalAmbiguitySets}, + initial_states::InitialStates = AllStates(), +) + marginal = interval_prob_hcat(ps) + return IntervalMarkovDecisionProcess(marginal, initial_states) +end + +function interval_prob_hcat( + ps::Vector{<:IntervalAmbiguitySets{R, MR}}, +) where {R, MR <: AbstractMatrix{R}} + if length(ps) == 0 + throw(ArgumentError("Cannot concatenate an empty vector of IntervalAmbiguitySets.")) + end + + num_actions = num_sets(ps[1]) + for (i, p) in enumerate(ps) + if num_sets(p) != num_actions + throw(DimensionMismatch("All IntervalAmbiguitySets must have the same number of sets (actions). Expected $num_actions, was $(num_sets(p)) at index $i.")) end end - return IntervalMarkovDecisionProcess{S,A}(states, actions, transition_intervals, rewards) + l = mapreduce(p -> p.lower, hcat, ps) + g = mapreduce(p -> p.gap, hcat, ps) + + ambiguity_set = IntervalAmbiguitySets(l, g) + + stateindices = (1,) + actionindices = (1,) + source_dims = (length(ps),) + action_vars = (num_actions,) + marginal = Marginal(ambiguity_set, stateindices, actionindices, source_dims, action_vars) + + return marginal end \ No newline at end of file diff --git a/src/models/ProductProcess.jl b/src/models/ProductProcess.jl index 2fc77693..83602125 100644 --- a/src/models/ProductProcess.jl +++ b/src/models/ProductProcess.jl @@ -20,7 +20,7 @@ is a set of ambiguity sets on the product transition probabilities, for each pro See [`IntervalMarkovDecisionProcess`](@ref) and [`DFA`](@ref) for more information on the structure, definition, and usage of the DFA and IMDP. ### Fields -- `imdp::M`: contains details for the interval Markov process. +- `mdp::M`: contains details for the interval Markov process. - `dfa::D`: contains details for the DFA - `labelling_func::L`: the labelling function from IMDP states to DFA actions """ @@ -29,12 +29,12 @@ struct ProductProcess{ D <: DeterministicAutomaton, L <: AbstractLabelling, } <: StochasticProcess - imdp::M + mdp::M dfa::D labelling_func::L function ProductProcess( - imdp::M, + mdp::M, dfa::D, labelling_func::L, ) where { @@ -42,23 +42,23 @@ struct ProductProcess{ D <: DeterministicAutomaton, L <: AbstractLabelling, } - checkproduct(imdp, dfa, labelling_func) + checkproduct(mdp, dfa, labelling_func) - return new{M, D, L}(imdp, dfa, labelling_func) + return new{M, D, L}(mdp, dfa, labelling_func) end end function checkproduct( - imdp::IntervalMarkovProcess, + mdp::FactoredRMDP, dfa::DeterministicAutomaton, labelling_func::AbstractLabelling, ) - # check labelling states (input) match IMDP states - if size(labelling_func) != source_shape(imdp) + # check labelling states (input) match MDP states + if size(labelling_func) != state_variables(mdp) throw( DimensionMismatch( - "The number of IMDP states $(source_shape(imdp)) is not equal to number of mapped states $(size(labelling_func)) in the labelling function.", + "The mapped states $(size(labelling_func)) in the labelling function is not equal the fRMDP state variables $(state_variables(mdp)).", ), ) end @@ -78,7 +78,7 @@ end Return the interval markov decision process of the product """ -markov_process(proc::ProductProcess) = proc.imdp +markov_process(proc::ProductProcess) = proc.mdp """ automaton(proc::ProductIntervalMarkovDecisionProcessDFA) diff --git a/src/models/models.jl b/src/models/models.jl index 8fd80ff5..7139da3d 100644 --- a/src/models/models.jl +++ b/src/models/models.jl @@ -7,8 +7,10 @@ export num_states, num_actions, initial_states include("FactoredRobustMarkovDecisionProcess.jl") export FactoredRobustMarkovDecisionProcess, state_variables, action_variables +# Convenience model constructors - they all return a FactoredRobustMarkovDecisionProcess include("IntervalMarkovChain.jl") -export IntervalMarkovChain +include("IntervalMarkovDecisionProcess.jl") +export IntervalMarkovChain, IntervalMarkovDecisionProcess include("DeterministicAutomaton.jl") diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index 588b9be3..c1336af7 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -130,8 +130,8 @@ function Base.getindex(p::IntervalAmbiguitySets, j) return IntervalAmbiguitySet(l, g) end -sub2ind(::IntervalAmbiguitySets, jₛ, jₐ) = jₛ -function Base.getindex(p::IntervalAmbiguitySets, jₛ, jₐ) +sub2ind(::IntervalAmbiguitySets, jₐ, jₛ) = jₛ +function Base.getindex(p::IntervalAmbiguitySets, jₐ, jₛ) # Select by columns only! l = @view p.lower[:, jₛ] g = @view p.gap[:, jₛ] diff --git a/src/probabilities/Marginal.jl b/src/probabilities/Marginal.jl index 6a3af190..2c09534b 100644 --- a/src/probabilities/Marginal.jl +++ b/src/probabilities/Marginal.jl @@ -17,7 +17,7 @@ struct SARectangularMarginal{A <: AbstractAmbiguitySets, N, M, I <: LinearIndice ) where {A <: AbstractAmbiguitySets, N, M} checkindices(ambiguity_sets, state_indices, action_indices, source_dims, action_vars) - linear_index = LinearIndices((source_dims..., action_vars...)) + linear_index = LinearIndices((action_vars..., source_dims...)) return new{A, N, M, typeof(linear_index)}(ambiguity_sets, state_indices, action_indices, source_dims, action_vars, linear_index) end end @@ -61,11 +61,11 @@ function Base.getindex(p::Marginal, source, action) return ambiguity_sets(p)[sub2ind(p, source, action)] end -sub2ind(p::Marginal, source::CartesianIndex, action::CartesianIndex) = sub2ind(p, Tuple(source), Tuple(action)) -function sub2ind(p::Marginal, source::NTuple{N, <:Integer}, action::NTuple{M, <:Integer}) where {N, M} - source = getindex.(Tuple(source), p.state_indices) +sub2ind(p::Marginal, action::CartesianIndex, source::CartesianIndex) = sub2ind(p, Tuple(action), Tuple(source)) +function sub2ind(p::Marginal, action::NTuple{M, <:Integer}, source::NTuple{N, <:Integer}) where {N, M} action = getindex.(Tuple(action), p.action_indices) - j = p.linear_index[source..., action...] + source = getindex.(Tuple(source), p.state_indices) + j = p.linear_index[action..., source...] return j end \ No newline at end of file diff --git a/src/robust_value_iteration.jl b/src/robust_value_iteration.jl index c38e332b..f4346d64 100644 --- a/src/robust_value_iteration.jl +++ b/src/robust_value_iteration.jl @@ -162,15 +162,16 @@ end function ValueFunction(problem::AbstractIntervalMDPProblem) mp = system(problem) previous = arrayfactory(mp, valuetype(mp), state_variables(mp)) + previous .= zero(valuetype(mp)) current = copy(previous) return ValueFunction(previous, current) end -function lastdiff!(V) +function lastdiff!(V::ValueFunction{R}) where {R} # Reuse prev to store the latest difference V.previous .-= V.current - rmul!(V.previous, -1.0) + rmul!(V.previous, -one(R)) return V.previous end diff --git a/src/strategy.jl b/src/strategy.jl index c779ec18..4880a193 100644 --- a/src/strategy.jl +++ b/src/strategy.jl @@ -8,7 +8,7 @@ checkstrategy(::NoStrategy, system) = nothing A stationary strategy is a strategy that is the same for all time steps. """ -struct StationaryStrategy{A <: AbstractArray{Int32}} <: AbstractStrategy +struct StationaryStrategy{N, A <: AbstractArray{NTuple{N, Int32}}} <: AbstractStrategy strategy::A end Base.getindex(strategy::StationaryStrategy, k) = strategy.strategy @@ -27,16 +27,24 @@ function checkstrategy(strategy::AbstractArray, system::ProductProcess) end end -function checkstrategy(strategy::AbstractArray, system::IntervalMarkovProcess) - num_actions = stateptr(system)[2:end] .- stateptr(system)[1:(end - 1)] - ranges = (1:n for n in source_shape(system)) - if !all(1 .<= vec(strategy[ranges...]) .<= num_actions) +function checkstrategy(strategy::AbstractArray, system::FactoredRMDP) + if size(strategy) != source_shape(system) throw( - DomainError( - "The strategy includes at least one invalid action (less than 1 or greater than num_actions for the state).", + DimensionMismatch( + "The strategy shape $(size(strategy)) does not match the source shape of the system $(source_shape(system)).", ), ) end + + for jₛ in CartesianIndices(source_shape(system)) + if !all(1 .<= strategy[jₛ] .<= action_shape(system)) + throw( + DomainError( + "The strategy includes at least one invalid action (less than 1 or greater than num_actions for the state).", + ), + ) + end + end end """ @@ -45,7 +53,7 @@ end A time-varying strategy is a strategy that _may_ vary over time. Since we need to store the strategy for each time step, the strategy is finite, and thus only applies to finite time specifications, of the same length as the strategy. """ -struct TimeVaryingStrategy{A <: AbstractArray{Int32}} <: AbstractStrategy +struct TimeVaryingStrategy{N, A <: AbstractArray{NTuple{N, Int32}}} <: AbstractStrategy strategy::Vector{A} end Base.getindex(strategy::TimeVaryingStrategy, k) = strategy.strategy[k] diff --git a/src/strategy_cache.jl b/src/strategy_cache.jl index 5585f12b..8997af06 100644 --- a/src/strategy_cache.jl +++ b/src/strategy_cache.jl @@ -25,7 +25,7 @@ end construct_strategy_cache(::VerificationProblem{S, F, <:NoStrategy}) where {S, F} = NoStrategyCache() -function extract_strategy!(::NoStrategyCache, values, V, j, maximize) +function extract_strategy!(::NoStrategyCache, values, V, j, action_shape, maximize) return maximize ? maximum(values) : minimum(values) end step_postprocess_strategy_cache!(::NoStrategyCache) = nothing @@ -39,7 +39,7 @@ construct_strategy_cache(problem::VerificationProblem{S, F, C}) where {S, F, C} GivenStrategyCache(strategy(problem)) time_length(cache::GivenStrategyCache) = time_length(cache.strategy) -struct ActiveGivenStrategyCache{A <: AbstractArray{Int32}} <: NonOptimizingStrategyCache +struct ActiveGivenStrategyCache{N, A <: AbstractArray{NTuple{N, Int32}}} <: NonOptimizingStrategyCache strategy::A end Base.getindex(cache::GivenStrategyCache, k) = ActiveGivenStrategyCache(cache.strategy[k]) @@ -53,12 +53,12 @@ construct_strategy_cache(problem::ControlSynthesisProblem) = construct_strategy_ ) # Strategy cache for storing time-varying policies -struct TimeVaryingStrategyCache{A <: AbstractArray{Int32}} <: OptimizingStrategyCache +struct TimeVaryingStrategyCache{N, A <: AbstractArray{NTuple{N, Int32}}} <: OptimizingStrategyCache cur_strategy::A strategy::Vector{A} end -function TimeVaryingStrategyCache(cur_strategy::A) where {A} +function TimeVaryingStrategyCache(cur_strategy::A) where {N, A <: AbstractArray{NTuple{N, Int32}}} return TimeVaryingStrategyCache(cur_strategy, Vector{A}()) end @@ -66,37 +66,32 @@ function construct_strategy_cache(problem::ControlSynthesisProblem, time_varying mp = system(problem) N = length(action_variables(mp)) cur_strategy = arrayfactory(mp, NTuple{N, Int32}, source_shape(mp)) + cur_strategy .= (ntuple(_ -> 0, N),) return TimeVaryingStrategyCache(cur_strategy) end -function replacezerobyone!(array) - array[array .== 0] .= 1 - return array -end - -cachetostrategy(strategy_cache::TimeVaryingStrategyCache) = TimeVaryingStrategy([ - replacezerobyone!(indices) for indices in reverse(strategy_cache.strategy) -]) +cachetostrategy(strategy_cache::TimeVaryingStrategyCache) = TimeVaryingStrategy(collect(reverse(strategy_cache.strategy))) function extract_strategy!( strategy_cache::TimeVaryingStrategyCache, values::AbstractArray{R}, V, - j, + jₛ, + action_shape, maximize, ) where {R <: Real} opt_val = maximize ? typemin(R) : typemax(R) - opt_index = 1 + opt_index = ntuple(_ -> 1, length(action_shape)) neutral = (opt_val, opt_index) - return _extract_strategy!(strategy_cache.cur_strategy, values, neutral, j, maximize) + return _extract_strategy!(strategy_cache.cur_strategy, values, neutral, jₛ, action_shape, maximize) end function step_postprocess_strategy_cache!(strategy_cache::TimeVaryingStrategyCache) push!(strategy_cache.strategy, copy(strategy_cache.cur_strategy)) end # Strategy cache for storing stationary policies -struct StationaryStrategyCache{A <: AbstractArray{Int32}} <: OptimizingStrategyCache +struct StationaryStrategyCache{N, A <: AbstractArray{NTuple{N, Int32}}} <: OptimizingStrategyCache strategy::A end @@ -106,44 +101,45 @@ function construct_strategy_cache( ) mp = system(problem) N = length(action_variables(mp)) - cur_strategy = arrayfactory(mp, NTuple{N, Int32}, source_shape(mp)) + strategy = arrayfactory(mp, NTuple{N, Int32}, source_shape(mp)) + strategy .= (ntuple(_ -> 0, N),) return StationaryStrategyCache(strategy) end -cachetostrategy(strategy_cache::StationaryStrategyCache) = - StationaryStrategy(replacezerobyone!(strategy_cache.strategy)) +cachetostrategy(strategy_cache::StationaryStrategyCache) = StationaryStrategy(strategy_cache.strategy) function extract_strategy!( strategy_cache::StationaryStrategyCache, values::AbstractArray{R}, V, - j, + jₛ, + action_shape, maximize, ) where {R <: Real} - neutral = if iszero(strategy_cache.strategy[j]) + neutral = if all(iszero.(strategy_cache.strategy[jₛ])) maximize ? typemin(R) : typemax(R), 1 else - V[j], strategy_cache.strategy[j] + V[jₛ], strategy_cache.strategy[jₛ] end - return _extract_strategy!(strategy_cache.strategy, values, neutral, j, maximize) + return _extract_strategy!(strategy_cache.strategy, values, neutral, jₛ, action_shape, maximize) end step_postprocess_strategy_cache!(::StationaryStrategyCache) = nothing # Shared between stationary and time-varying strategies -function _extract_strategy!(cur_strategy, values, neutral, j, maximize) +function _extract_strategy!(cur_strategy, values, neutral, jₛ, action_shape, maximize) gt = maximize ? (>) : (<) opt_val, opt_index = neutral - # TODO: update to accept state variables - for (i, v) in enumerate(values) + for jₐ in CartesianIndices(action_shape) + v = values[jₐ] if gt(v, opt_val) opt_val = v - opt_index = i + opt_index = Tuple(jₐ) end end - @inbounds cur_strategy[j] = opt_index + @inbounds cur_strategy[jₛ] = opt_index return opt_val end diff --git a/src/utils.jl b/src/utils.jl index ed1cedab..d2bc3d3a 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -6,7 +6,7 @@ arrayfactory(marginal::Marginal, T, num_states) = arrayfactory(marginal.ambiguity_sets, T, num_states) arrayfactory(prob::IntervalAmbiguitySets, T, num_states) = arrayfactory(prob.gap, T, num_states) -arrayfactory(::MR, T, num_states) where {MR <: AbstractArray} = zeros(T, num_states) +arrayfactory(::MR, T, num_states) where {MR <: AbstractArray} = Array{T}(undef, num_states) valuetype(mp::ProductProcess) = valuetype(markov_process(mp)) valuetype(mp::FactoredRMDP) = valuetype(mp.transition[1]) diff --git a/test/base/base.jl b/test/base/base.jl index 878998b2..b02ed5f5 100644 --- a/test/base/base.jl +++ b/test/base/base.jl @@ -2,14 +2,14 @@ test_files = [ "bellman.jl", "vi.jl", - # "imdp.jl", - # "synthesis.jl", - # "specification.jl", + "imdp.jl", + "synthesis.jl", + "specification.jl", # "orthogonal.jl", # "mixture.jl", - # "labelling.jl", - # "dfa.jl", - # "product.jl", + "labelling.jl", + "dfa.jl", + "product.jl", ] for f in test_files @testset "base/$f" include(f) diff --git a/test/base/imdp.jl b/test/base/imdp.jl index 51b122cd..c39a7463 100644 --- a/test/base/imdp.jl +++ b/test/base/imdp.jl @@ -1,7 +1,7 @@ using Revise, Test using IntervalMDP -prob1 = IntervalProbabilities(; +prob1 = IntervalAmbiguitySets(; lower = [ 0.0 0.5 0.1 0.3 @@ -14,7 +14,7 @@ prob1 = IntervalProbabilities(; ], ) -prob2 = IntervalProbabilities(; +prob2 = IntervalAmbiguitySets(; lower = [ 0.1 0.2 0.2 0.3 @@ -27,18 +27,21 @@ prob2 = IntervalProbabilities(; ], ) -prob3 = IntervalProbabilities(; lower = [ - 0.0 - 0.0 - 1.0 -][:, :], upper = [ - 0.0 - 0.0 - 1.0 -][:, :]) +prob3 = IntervalAmbiguitySets(; + lower = [ + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ], + upper = [ + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ] +) transition_probs = [prob1, prob2, prob3] -istates = [Int32(1)] +istates = [1] mdp = IntervalMarkovDecisionProcess(transition_probs, istates) @test initial_states(mdp) == istates @@ -56,7 +59,7 @@ mdp = IntervalMarkovDecisionProcess(transition_probs) end @testset "explicit sink state" begin - transition_prob, _ = IntervalMDP.interval_prob_hcat(transition_probs) + transition_prob = IntervalMDP.interval_prob_hcat(transition_probs) @test_throws DimensionMismatch IntervalMarkovChain(transition_prob) # Finite time reachability diff --git a/test/base/product.jl b/test/base/product.jl index 5a4ec267..f8c23c0e 100644 --- a/test/base/product.jl +++ b/test/base/product.jl @@ -17,7 +17,7 @@ using IntervalMDP dfa = DFA(delta, istate, atomic_props) # imdp - prob1 = IntervalProbabilities(; + prob1 = IntervalAmbiguitySets(; lower = [ 0.0 0.5 0.1 0.3 @@ -30,7 +30,7 @@ using IntervalMDP ], ) - prob2 = IntervalProbabilities(; + prob2 = IntervalAmbiguitySets(; lower = [ 0.1 0.2 0.2 0.3 @@ -43,17 +43,7 @@ using IntervalMDP ], ) - prob3 = IntervalProbabilities(; lower = [ - 0.0 - 0.0 - 1.0 - ][:, :], upper = [ - 0.0 - 0.0 - 1.0 - ][:, :]) - - transition_probs = [prob1, prob2, prob3] + transition_probs = [prob1, prob2] istates = [Int32(1)] mdp = IntervalMarkovDecisionProcess(transition_probs, istates) @@ -103,7 +93,7 @@ end @testset "bellman" begin for N in [Float32, Float64, Rational{BigInt}] @testset "N = $N" begin - prob = IntervalProbabilities(; + prob = IntervalAmbiguitySets(; lower = N[ 0 5//10 0 1//10 3//10 0 @@ -150,7 +140,7 @@ end @testset "value iteration" begin for N in [Float32, Float64, Rational{BigInt}] @testset "N = $N" begin - prob1 = IntervalProbabilities(; + prob1 = IntervalAmbiguitySets(; lower = N[ 0//10 5//10 1//10 3//10 @@ -163,7 +153,7 @@ end ], ) - prob2 = IntervalProbabilities(; + prob2 = IntervalAmbiguitySets(; lower = N[ 1//10 2//10 2//10 3//10 @@ -176,17 +166,7 @@ end ], ) - prob3 = IntervalProbabilities(; lower = N[ - 0 - 0 - 1 - ][:, :], upper = N[ - 0 - 0 - 1 - ][:, :]) - - transition_probs = [prob1, prob2, prob3] + transition_probs = [prob1, prob2] mdp = IntervalMarkovDecisionProcess(transition_probs) # Product model - just simple reachability diff --git a/test/base/specification.jl b/test/base/specification.jl index 8b3a9d00..cebbec5a 100644 --- a/test/base/specification.jl +++ b/test/base/specification.jl @@ -149,7 +149,7 @@ end # Errors # ########## @testset "errors" begin - prob = IntervalProbabilities(; + prob = IntervalAmbiguitySets(; lower = [ 0.0 0.5 0.0 0.1 0.3 0.0 @@ -162,7 +162,7 @@ end ], ) mc = IntervalMarkovChain(prob) - tv_strat = TimeVaryingStrategy([Int32[1, 1, 1]]) + tv_strat = TimeVaryingStrategy([Tuple{Int32}[(1,), (1,)]]) # Product model - just simple reachability delta = TransitionFunction(Int32[ @@ -176,10 +176,10 @@ end labelling = LabellingFunction(Int32[1, 1, 2]) prod_proc = ProductProcess(mc, dfa, labelling) - tv_prod_strat = TimeVaryingStrategy([Int32[ - 1 1 - 1 1 - 1 1 + tv_prod_strat = TimeVaryingStrategy([Tuple{Int32}[ + (1,) (1,) + (1,) (1,) + (1,) (1,) ]]) # Time horizon must be a positive integer diff --git a/test/base/synthesis.jl b/test/base/synthesis.jl index bc5f4220..c105ea7f 100644 --- a/test/base/synthesis.jl +++ b/test/base/synthesis.jl @@ -1,7 +1,7 @@ using Revise, Test using IntervalMDP -prob1 = IntervalProbabilities(; +prob1 = IntervalAmbiguitySets(; lower = [ 0.0 0.5 0.1 0.3 @@ -14,7 +14,7 @@ prob1 = IntervalProbabilities(; ], ) -prob2 = IntervalProbabilities(; +prob2 = IntervalAmbiguitySets(; lower = [ 0.1 0.2 0.2 0.3 @@ -27,15 +27,18 @@ prob2 = IntervalProbabilities(; ], ) -prob3 = IntervalProbabilities(; lower = [ - 0.0 - 0.0 - 1.0 -][:, :], upper = [ - 0.0 - 0.0 - 1.0 -][:, :]) +prob3 = IntervalAmbiguitySets(; + lower = [ + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ], + upper = [ + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ] +) transition_probs = [prob1, prob2, prob3] istates = [Int32(1)] @@ -52,7 +55,7 @@ policy, V, k, res = sol @test policy isa TimeVaryingStrategy @test time_length(policy) == 10 for k in 1:time_length(policy) - @test policy[k] == [1, 2, 1] + @test policy[k] == [(1,), (2,), (1,)] end @test strategy(sol) == policy @@ -74,7 +77,7 @@ policy, V, k, res = solve(problem) @test policy isa TimeVaryingStrategy @test time_length(policy) == 10 for k in 1:time_length(policy) - @test policy[k] == [2, 2, 1] + @test policy[k] == [(2,), (2,), (1,)] end # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP @@ -89,7 +92,7 @@ problem = ControlSynthesisProblem(mdp, spec) policy, V, k, res = solve(problem) @test policy isa StationaryStrategy -@test policy[1] == [1, 2, 1] +@test policy[1] == [(1,), (2,), (1,)] # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP problem = VerificationProblem(mdp, spec, policy) @@ -108,39 +111,13 @@ policy, V, k, res = solve(problem) @test policy isa TimeVaryingStrategy @test time_length(policy) == 10 for k in 1:(time_length(policy) - 1) - @test policy[k] == [2, 2, 1] + @test policy[k] == [(2,), (2,), (1,)] end # The last time step (aka. the first value iteration step) has a different strategy. -@test policy[time_length(policy)] == [2, 1, 1] +@test policy[time_length(policy)] == [(2,), (1,), (1,)] @testset "implicit sink state" begin - prob1 = IntervalProbabilities(; - lower = [ - 0.0 0.5 - 0.1 0.3 - 0.2 0.1 - ], - upper = [ - 0.5 0.7 - 0.6 0.5 - 0.7 0.3 - ], - ) - - prob2 = IntervalProbabilities(; - lower = [ - 0.1 0.2 - 0.2 0.3 - 0.3 0.4 - ], - upper = [ - 0.6 0.6 - 0.5 0.5 - 0.4 0.4 - ], - ) - transition_probs = [prob1, prob2] mdp = IntervalMarkovDecisionProcess(transition_probs) @@ -153,6 +130,6 @@ end @test policy isa TimeVaryingStrategy @test time_length(policy) == 10 for k in 1:time_length(policy) - @test policy[k] == [1, 2, 1] + @test policy[k] == [(1,), (2,)] end end diff --git a/test/sparse/imdp.jl b/test/sparse/imdp.jl index 9fe50380..9e10fa92 100644 --- a/test/sparse/imdp.jl +++ b/test/sparse/imdp.jl @@ -1,7 +1,7 @@ using Revise, Test using IntervalMDP -prob1 = IntervalProbabilities(; +prob1 = IntervalAmbiguitySets(; lower = sparse([ 0.0 0.5 0.1 0.3 @@ -14,7 +14,7 @@ prob1 = IntervalProbabilities(; ]), ) -prob2 = IntervalProbabilities(; +prob2 = IntervalAmbiguitySets(; lower = sparse([ 0.1 0.2 0.2 0.3 @@ -27,17 +27,17 @@ prob2 = IntervalProbabilities(; ]), ) -prob3 = IntervalProbabilities(; +prob3 = IntervalAmbiguitySets(; lower = sparse([ - 0.0 - 0.0 - 1.0 - ][:, :]), + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ]), upper = sparse([ - 0.0 - 0.0 - 1.0 - ][:, :]), + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ]), ) transition_probs = [prob1, prob2, prob3] @@ -49,7 +49,7 @@ mdp = IntervalMarkovDecisionProcess(transition_probs, istates) mdp = IntervalMarkovDecisionProcess(transition_probs) @testset "explicit sink state" begin - transition_prob, _ = IntervalMDP.interval_prob_hcat(transition_probs) + transition_prob = IntervalMDP.interval_prob_hcat(transition_probs) @test_throws DimensionMismatch IntervalMarkovChain(transition_prob) # Finite time reachability diff --git a/test/sparse/sparse.jl b/test/sparse/sparse.jl index 32a41459..da4c6b6c 100644 --- a/test/sparse/sparse.jl +++ b/test/sparse/sparse.jl @@ -2,8 +2,8 @@ test_files = [ "bellman.jl", "vi.jl", - # "imdp.jl", - # "synthesis.jl", + "imdp.jl", + "synthesis.jl", # "orthogonal.jl" ] diff --git a/test/sparse/synthesis.jl b/test/sparse/synthesis.jl index e1584880..a3a7fc1b 100644 --- a/test/sparse/synthesis.jl +++ b/test/sparse/synthesis.jl @@ -1,7 +1,7 @@ using Revise, Test using IntervalMDP, SparseArrays -prob1 = IntervalProbabilities(; +prob1 = IntervalAmbiguitySets(; lower = sparse([ 0.0 0.5 0.1 0.3 @@ -14,7 +14,7 @@ prob1 = IntervalProbabilities(; ]), ) -prob2 = IntervalProbabilities(; +prob2 = IntervalAmbiguitySets(; lower = sparse([ 0.1 0.2 0.2 0.3 @@ -27,17 +27,17 @@ prob2 = IntervalProbabilities(; ]), ) -prob3 = IntervalProbabilities(; +prob3 = IntervalAmbiguitySets(; lower = sparse([ - 0.0 - 0.0 - 1.0 - ][:, :]), + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ]), upper = sparse([ - 0.0 - 0.0 - 1.0 - ][:, :]), + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ]) ) transition_probs = [prob1, prob2, prob3] @@ -49,14 +49,20 @@ mdp = IntervalMarkovDecisionProcess(transition_probs, istates) prop = FiniteTimeReachability([3], 10) spec = Specification(prop, Pessimistic, Maximize) problem = ControlSynthesisProblem(mdp, spec) -policy, V, k, res = solve(problem) +sol = solve(problem) +policy, V, k, res = sol @test policy isa TimeVaryingStrategy @test time_length(policy) == 10 for k in 1:time_length(policy) - @test policy[k] == [1, 2, 1] + @test policy[k] == [(1,), (2,), (1,)] end +@test strategy(sol) == policy +@test value_function(sol) == V +@test num_iterations(sol) == k +@test residual(sol) == res + # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP problem = VerificationProblem(mdp, spec, policy) V_mc, k, res = solve(problem) @@ -71,7 +77,7 @@ policy, V, k, res = solve(problem) @test policy isa TimeVaryingStrategy @test time_length(policy) == 10 for k in 1:time_length(policy) - @test policy[k] == [2, 2, 1] + @test policy[k] == [(2,), (2,), (1,)] end # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP @@ -86,7 +92,7 @@ problem = ControlSynthesisProblem(mdp, spec) policy, V, k, res = solve(problem) @test policy isa StationaryStrategy -@test policy[1] == [1, 2, 1] +@test policy[1] == [(1,), (2,), (1,)] # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP problem = VerificationProblem(mdp, spec, policy) @@ -105,8 +111,25 @@ policy, V, k, res = solve(problem) @test policy isa TimeVaryingStrategy @test time_length(policy) == 10 for k in 1:(time_length(policy) - 1) - @test policy[k] == [2, 2, 1] + @test policy[k] == [(2,), (2,), (1,)] end # The last time step (aka. the first value iteration step) has a different strategy. -@test policy[time_length(policy)] == [2, 1, 1] +@test policy[time_length(policy)] == [(2,), (1,), (1,)] + +@testset "implicit sink state" begin + transition_probs = [prob1, prob2] + mdp = IntervalMarkovDecisionProcess(transition_probs) + + # Finite time reachability + prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = ControlSynthesisProblem(mdp, spec) + policy, V, k, res = solve(problem) + + @test policy isa TimeVaryingStrategy + @test time_length(policy) == 10 + for k in 1:time_length(policy) + @test policy[k] == [(1,), (2,)] + end +end From 482d14bcb1214df3d528e16512cdbcb6eb5bc55e Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 3 Sep 2025 17:02:49 +0200 Subject: [PATCH 05/71] Fix Data module after model restructurings --- src/Data/bmdp-tool.jl | 132 +++++++------ src/Data/intervalmdp.jl | 37 ++-- src/Data/prism.jl | 174 +++++++++--------- src/IntervalMDP.jl | 2 +- .../FactoredRobustMarkovDecisionProcess.jl | 2 + src/models/IntervalMarkovDecisionProcess.jl | 2 +- src/models/models.jl | 2 +- src/probabilities/IntervalAmbiguitySets.jl | 4 +- src/probabilities/Marginal.jl | 8 +- src/probabilities/probabilities.jl | 3 + test/data/bmdp_tool.jl | 83 +++++---- test/data/intervalmdp.jl | 21 ++- test/data/multiObj_robotIMDP.nc | Bin 33961 -> 24430 bytes test/data/prism.jl | 54 ++++-- test/runtests.jl | 2 +- 15 files changed, 299 insertions(+), 227 deletions(-) diff --git a/src/Data/bmdp-tool.jl b/src/Data/bmdp-tool.jl index 3a3e622e..87d3b138 100644 --- a/src/Data/bmdp-tool.jl +++ b/src/Data/bmdp-tool.jl @@ -39,24 +39,18 @@ function read_bmdp_tool_file(path) end open(path, "r") do io - number_states = read_intline(readline(io)) - number_actions = read_intline(readline(io)) - number_terminal = read_intline(readline(io)) + num_states = read_intline(readline(io)) + num_actions = read_intline(readline(io)) + num_terminal = read_intline(readline(io)) - terminal_states = map(1:number_terminal) do _ + terminal_states = map(1:num_terminal) do _ return CartesianIndex(read_intline(readline(io)) + Int32(1)) end - probs = Vector{ - IntervalAmbiguitySet{ - Float64, - Vector{Float64}, - SparseArrays.FixedSparseCSC{Float64, Int32}, - }, - }( - undef, - number_states, - ) + num_choices = num_states * num_actions + + probs_lower = Vector{SparseVector{Float64, Int32}}(undef, num_choices) + probs_upper = Vector{SparseVector{Float64, Int32}}(undef, num_choices) lines_it = eachline(io) next = iterate(lines_it) @@ -68,20 +62,23 @@ function read_bmdp_tool_file(path) cur_line, state = next src, act, dest, lower, upper = read_bmdp_tool_transition_line(cur_line) - for j in 0:(number_states - 1) - probs_lower = spzeros(Float64, Int32, number_states, number_actions) - probs_upper = spzeros(Float64, Int32, number_states, number_actions) + for jₛ in 1:num_states + for jₐ in 1:num_actions + state_action_probs_lower = spzeros(Float64, Int32, num_states) + state_action_probs_upper = spzeros(Float64, Int32, num_states) - actions_to_remove = Int64[] + if src != jₛ - 1 + throw(ArgumentError("Transitions file is not sorted by source index or the number of actions was less than expected. Expected source index $(jₛ - 1), got $src.")) + end - for k in 0:(number_actions - 1) - if src != j || act != k - push!(actions_to_remove, k + 1) + if act != jₐ - 1 + throw(ArgumentError("Transitions file is not sorted by action index or the number of actions was less than expected. Expected action index $(jₐ - 1), got $act.")) end - while src == j && act == k - probs_lower[dest + 1, k + 1] = lower - probs_upper[dest + 1, k + 1] = upper + while src == jₛ - 1 && act == jₐ - 1 + # PRISM uses 0-based indexing + state_action_probs_lower[dest + 1] = lower + state_action_probs_upper[dest + 1] = upper next = iterate(lines_it, state) if isnothing(next) @@ -91,20 +88,46 @@ function read_bmdp_tool_file(path) cur_line, state = next src, act, dest, lower, upper = read_bmdp_tool_transition_line(cur_line) end - end - actions_to_keep = setdiff(collect(1:number_actions), actions_to_remove) - probs_lower = probs_lower[:, actions_to_keep] - probs_upper = probs_upper[:, actions_to_keep] + j = (jₛ - 1) * num_actions + jₐ + probs_lower[j] = state_action_probs_lower + probs_upper[j] = state_action_probs_upper + end + end - probs[j + 1] = IntervalAmbiguitySet(; lower = probs_lower, upper = probs_upper) + # Colptr is the same for both lower and upper + num_col = mapreduce(x -> size(x, 2), +, probs_lower) + colptr = zeros(Int32, num_col + 1) + nnz_sofar = 0 + @inbounds for i in eachindex(probs_lower) + colptr[i] = nnz_sofar + 1 + nnz_sofar += nnz(probs_lower[i]) end + colptr[end] = nnz_sofar + 1 + + probs_lower_rowval = mapreduce(lower -> lower.nzind, vcat, probs_lower) + probs_lower_nzval = mapreduce(lower -> lower.nzval, vcat, probs_lower) + probs_lower = SparseMatrixCSC( + num_states, + num_col, + colptr, + probs_lower_rowval, + probs_lower_nzval, + ) - action_list_per_state = collect(0:(number_actions - 1)) - action_list = - convert.(Int32, mapreduce(_ -> action_list_per_state, vcat, 1:number_states)) + probs_upper_rowval = mapreduce(upper -> upper.nzind, vcat, probs_upper) + probs_upper_nzval = mapreduce(upper -> upper.nzval, vcat, probs_upper) + probs_upper = SparseMatrixCSC( + num_states, + num_col, + colptr, + probs_upper_rowval, + probs_upper_nzval, + ) + + probs = IntervalAmbiguitySets(; lower = probs_lower, upper = probs_upper) - mdp = IntervalMarkovDecisionProcess(probs, action_list) + mdp = IntervalMarkovDecisionProcess(probs, num_actions) return mdp, terminal_states end end @@ -146,20 +169,17 @@ write_bmdp_tool_file( ) where {T} = write_bmdp_tool_file(path, mdp, CartesianIndex.(terminal_states)) """ - write_bmdp_tool_file(path, mdp::IntervalMarkovDecisionProcess, terminal_states::Vector{<:CartesianIndex}) + write_bmdp_tool_file(path, mdp::IMDP, terminal_states::Vector{<:CartesianIndex}) """ function write_bmdp_tool_file( path, - mdp::IntervalMarkovDecisionProcess, + mdp::IntervalMDP.IMDP{M}, terminal_states::Vector{<:CartesianIndex}, -) - prob = transition_prob(mdp) - l, g = lower(prob), gap(prob) - num_columns = num_source(prob) - sptr = IntervalMDP.stateptr(mdp) +) where {M} + marginal = marginals(mdp)[1] number_states = num_states(mdp) - number_actions = IntervalMDP.max_actions(mdp) + number_actions = IntervalMDP.num_actions(mdp) number_terminal = length(terminal_states) open(path, "w") do io @@ -171,28 +191,20 @@ function write_bmdp_tool_file( println(io, terminal_state[1] - 1) end - s = 1 - action = 0 - for j in 1:num_columns - if sptr[s + 1] == j - s += 1 - action = 0 - end - src = s - 1 - - column_lower = @view l[:, j] - I, V = SparseArrays.findnz(column_lower) + for jₛ in CartesianIndices(source_shape(marginal)) + src = jₛ[1] - 1 + for jₐ in CartesianIndices(action_shape(marginal)) + act = jₐ[1] - 1 + ambiguity_set = marginal[jₐ, jₛ] - for (i, v) in zip(I, V) - dest = i - 1 - pl = v - pu = pl + g[i, j] + for i in support(ambiguity_set) + dest = i - 1 # bmdp-tool uses 0-based indexing + pl = lower(ambiguity_set, i) + pu = upper(ambiguity_set, i) - transition = "$src $action $dest $pl $pu" - println(io, transition) + println(io, "$src $act $dest $pl $pu") + end end - - action += 1 end end end diff --git a/src/Data/intervalmdp.jl b/src/Data/intervalmdp.jl index 8f89c824..aa65c8a1 100644 --- a/src/Data/intervalmdp.jl +++ b/src/Data/intervalmdp.jl @@ -60,10 +60,14 @@ function read_intervalmdp_jl_model(model_path) upper_nzval, ) - prob = IntervalAmbiguitySet(; lower = P̲, upper = P̅) + prob = IntervalAmbiguitySets(; lower=P̲, upper=P̅) stateptr = convert.(Int32, dataset["stateptr"][:]) + num_actions = diff(stateptr) + if any(num_actions .!= num_actions[1]) + throw(DimensionMismatch("All states must have the same number of actions.")) + end - return IntervalMarkovDecisionProcess(prob, stateptr, initial_states) + return IntervalMarkovDecisionProcess(prob, num_actions[1], initial_states) end return mdp @@ -169,7 +173,7 @@ Write an `IntervalMarkovDecisionProcess` to an IntervalMDP.jl system file (netCD See [Data storage formats](@ref) for more information on the file format. """ -function write_intervalmdp_jl_model(model_path, mdp::IntervalMarkovDecisionProcess) +function write_intervalmdp_jl_model(model_path, mdp::IntervalMDP.IMDP{M}; deflate_level = 5) where {M} Dataset(model_path, "c") do dataset dataset.attrib["model"] = "imdp" dataset.attrib["format"] = "sparse_csc" @@ -182,19 +186,20 @@ function write_intervalmdp_jl_model(model_path, mdp::IntervalMarkovDecisionProce istates = Int32[] end defDim(dataset, "initial_states", length(istates)) - v = defVar(dataset, "initial_states", Int32, ("initial_states",); deflatelevel = 5) + v = defVar(dataset, "initial_states", Int32, ("initial_states",); deflatelevel = deflate_level) v[:] = istates - prob = transition_prob(mdp) - l = lower(prob) - g = gap(prob) + marginal = marginals(mdp)[1] + ambiguity_sets = marginal.ambiguity_sets + l = ambiguity_sets.lower + g = ambiguity_sets.gap defDim(dataset, "lower_colptr", length(l.colptr)) - v = defVar(dataset, "lower_colptr", Int32, ("lower_colptr",); deflatelevel = 5) + v = defVar(dataset, "lower_colptr", Int32, ("lower_colptr",); deflatelevel = deflate_level) v[:] = l.colptr defDim(dataset, "lower_rowval", length(l.rowval)) - v = defVar(dataset, "lower_rowval", Int32, ("lower_rowval",); deflatelevel = 5) + v = defVar(dataset, "lower_rowval", Int32, ("lower_rowval",); deflatelevel = deflate_level) v[:] = l.rowval defDim(dataset, "lower_nzval", length(l.nzval)) @@ -203,16 +208,16 @@ function write_intervalmdp_jl_model(model_path, mdp::IntervalMarkovDecisionProce "lower_nzval", eltype(l.nzval), ("lower_nzval",); - deflatelevel = 5, + deflatelevel = deflate_level, ) v[:] = l.nzval defDim(dataset, "upper_colptr", length(g.colptr)) - v = defVar(dataset, "upper_colptr", Int32, ("upper_colptr",); deflatelevel = 5) + v = defVar(dataset, "upper_colptr", Int32, ("upper_colptr",); deflatelevel = deflate_level) v[:] = g.colptr defDim(dataset, "upper_rowval", length(g.rowval)) - v = defVar(dataset, "upper_rowval", Int32, ("upper_rowval",); deflatelevel = 5) + v = defVar(dataset, "upper_rowval", Int32, ("upper_rowval",); deflatelevel = deflate_level) v[:] = g.rowval defDim(dataset, "upper_nzval", length(g.nzval)) @@ -221,13 +226,13 @@ function write_intervalmdp_jl_model(model_path, mdp::IntervalMarkovDecisionProce "upper_nzval", eltype(g.nzval), ("upper_nzval",); - deflatelevel = 5, + deflatelevel = deflate_level, ) v[:] = l.nzval + g.nzval - - defDim(dataset, "stateptr", length(stateptr(mdp))) + + defDim(dataset, "stateptr", source_shape(marginal)[1] + 1) v = defVar(dataset, "stateptr", Int32, ("stateptr",)) - v[:] = stateptr(mdp) + v[:] = [[Int32(1)]; (1:num_states(mdp)) .* Int32(num_actions(mdp)) .+ 1] return nothing end diff --git a/src/Data/prism.jl b/src/Data/prism.jl index 6d9cd417..fbdd8dce 100644 --- a/src/Data/prism.jl +++ b/src/Data/prism.jl @@ -15,13 +15,13 @@ See [Data storage formats](@ref) for more information on the file format. write_prism_file(path_without_file_ending, problem) = write_prism_file(path_without_file_ending, system(problem), specification(problem)) -write_prism_file(path_without_file_ending, mdp_or_mc, spec) = write_prism_file( +write_prism_file(path_without_file_ending, mdp, spec) = write_prism_file( path_without_file_ending * ".sta", path_without_file_ending * ".tra", path_without_file_ending * ".lab", path_without_file_ending * ".srew", path_without_file_ending * ".pctl", - mdp_or_mc, + mdp, spec, ) @@ -45,16 +45,16 @@ function write_prism_file( lab_path, srew_path, pctl_path, - mdp_or_mc, + mdp::IntervalMDP.IMDP{M}, spec, -) - write_prism_states_file(sta_path, mdp_or_mc) - write_prism_transitions_file(tra_path, mdp_or_mc) - write_prism_spec(lab_path, srew_path, pctl_path, mdp_or_mc, spec) +) where {M} + write_prism_states_file(sta_path, mdp) + write_prism_transitions_file(tra_path, mdp) + write_prism_spec(lab_path, srew_path, pctl_path, mdp, spec) end -function write_prism_states_file(sta_path, mdp_or_mc) - number_states = num_states(mdp_or_mc) +function write_prism_states_file(sta_path, mdp::IntervalMDP.IMDP{M}) where {M} + number_states = num_states(mdp) open(sta_path, "w") do io println(io, "(s)") @@ -66,59 +66,46 @@ function write_prism_states_file(sta_path, mdp_or_mc) end end -function write_prism_transitions_file(tra_path, mdp::IntervalMarkovDecisionProcess) - number_states = num_states(mdp) - - prob = transition_prob(mdp) - l, g = lower(prob), gap(prob) +function write_prism_transitions_file(tra_path, mdp::IntervalMDP.IMDP{M}; lb_threshold = 1e-12) where {M} + marginal = marginals(mdp)[1] - num_columns = num_source(prob) - num_transitions = nnz(l) - - sptr = IntervalMDP.stateptr(mdp) - num_choices = num_columns + num_transitions = nnz(marginal.ambiguity_sets.lower) # Number of non-zero entries in the lower bound matrix + num_choices = source_shape(marginal)[1] * action_shape(marginal)[1] open(tra_path, "w") do io - println(io, "$number_states $num_choices $num_transitions") - - s = 1 - action_idx = 0 - for j in 1:num_columns - if sptr[s + 1] == j - s += 1 - action_idx = 0 - end - src = s - 1 + println(io, "$(num_states(mdp)) $num_choices $num_transitions") - column_lower = view(l, :, j) - I, V = SparseArrays.findnz(column_lower) + for jₛ in CartesianIndices(source_shape(marginal)) + src = jₛ[1] - 1 # PRISM uses 0-based indexing - for (i, v) in zip(I, V) - dest = i - 1 - pl = v - pu = pl + g[i, j] - pl = max(pl, 1e-12) + for jₐ in CartesianIndices(action_shape(marginal)) + act = jₐ[1] - 1 # PRISM uses 0-based indexing + ambiguity_set = marginal[jₐ, jₛ] - println(io, "$src $action_idx $dest [$pl,$pu] $j") - end + for i in support(ambiguity_set) + dest = i - 1 # PRISM uses 0-based indexing + pl = max(lower(ambiguity_set, i), lb_threshold) # PRISM requires constant support + pu = upper(ambiguity_set, i) - action_idx += 1 + println(io, "$src $act $dest [$pl,$pu]") + end + end end end end -function write_prism_spec(lab_path, srew_path, pctl_path, mdp_or_mc, spec) - write_prism_labels_file(lab_path, mdp_or_mc, system_property(spec)) - write_prism_rewards_file(srew_path, mdp_or_mc, system_property(spec)) +function write_prism_spec(lab_path, srew_path, pctl_path, mdp, spec) + write_prism_labels_file(lab_path, mdp, system_property(spec)) + write_prism_rewards_file(srew_path, mdp, system_property(spec)) write_prism_props_file(pctl_path, spec) end function write_prism_labels_file( lab_path, - mdp_or_mc, + mdp, prop::IntervalMDP.AbstractReachability, ) - istates = initial_states(mdp_or_mc) + istates = initial_states(mdp) target_states = reach(prop) open(lab_path, "w") do io @@ -136,8 +123,8 @@ function write_prism_labels_file( end end -function write_prism_labels_file(lab_path, mdp_or_mc, prop::IntervalMDP.AbstractReachAvoid) - istates = initial_states(mdp_or_mc) +function write_prism_labels_file(lab_path, mdp, prop::IntervalMDP.AbstractReachAvoid) + istates = initial_states(mdp) target_states = reach(prop) avoid_states = avoid(prop) @@ -161,8 +148,8 @@ function write_prism_labels_file(lab_path, mdp_or_mc, prop::IntervalMDP.Abstract end end -function write_prism_labels_file(lab_path, mdp_or_mc, prop::IntervalMDP.AbstractReward) - istates = initial_states(mdp_or_mc) +function write_prism_labels_file(lab_path, mdp, prop::IntervalMDP.AbstractReward) + istates = initial_states(mdp) open(lab_path, "w") do io println(io, "0=\"init\" 1=\"deadlock\"") @@ -176,18 +163,18 @@ end function write_prism_rewards_file( lab_path, - mdp_or_mc, + mdp, prop::IntervalMDP.AbstractReachability, ) # Do nothing - no rewards for reachability return nothing end -function write_prism_rewards_file(srew_path, mdp_or_mc, prop::IntervalMDP.AbstractReward) +function write_prism_rewards_file(srew_path, mdp, prop::IntervalMDP.AbstractReward) rew = reward(prop) open(srew_path, "w") do io - println(io, "$(num_states(mdp_or_mc)) $(length(rew))") + println(io, "$(num_states(mdp)) $(length(rew))") for (i, r) in enumerate(rew) s = i - 1 # PRISM uses 0-based indexing @@ -273,10 +260,10 @@ read_prism_file(sta_path, tra_path, lab_path, pctl_path) = function read_prism_file(sta_path, tra_path, lab_path, srew_path, pctl_path) num_states = read_prism_states_file(sta_path) - probs, stateptr = read_prism_transitions_file(tra_path, num_states) + probs, num_actions = read_prism_transitions_file(tra_path, num_states) initial_states, spec = read_prism_spec(lab_path, srew_path, pctl_path, num_states) - mdp = IntervalMarkovDecisionProcess(probs, stateptr, initial_states) + mdp = IntervalMarkovDecisionProcess(probs, num_actions, initial_states) return ControlSynthesisProblem(mdp, spec) end @@ -294,57 +281,68 @@ function read_prism_transitions_file(tra_path, num_states) num_states_t, num_choices, num_transitions = read_prism_transitions_file_header(readline(io)) - @assert num_states == num_states_t + if num_states != num_states_t + throw(DimensionMismatch("Number of states in .sta file ($num_states) does not match number of states in .tra file ($num_states_t).")) + end + + if num_choices <= 0 + throw(ArgumentError("Number of choices must be positive, was $num_choices.")) + end + + if num_transitions <= 0 + throw(ArgumentError("Number of transitions must be positive, was $num_transitions.")) + end + + if num_choices % num_states_t != 0 + throw(ArgumentError("Number of choices ($num_choices) must be a multiple of the number of states ($num_states_t).")) + end + num_actions = num_choices ÷ num_states_t + num_src_states = num_choices ÷ num_actions probs_lower = Vector{SparseVector{Float64, Int32}}(undef, num_choices) probs_upper = Vector{SparseVector{Float64, Int32}}(undef, num_choices) - stateptr = Vector{Int32}(undef, num_states + 1) - stateptr[1] = 1 - stateptr[end] = num_choices + 1 - lines_it = eachline(io) - next = iterate(lines_it) + next = iterate(lines_it) if isnothing(next) throw(ArgumentError("Transitions file is empty")) end cur_line, state = next - # We ignore the act field since we only use indices for actions/choices - src, act_idx, dest, lower, upper, act = read_prism_transition_line(cur_line) - - outer_src = src - - for j in 1:num_choices - state_action_probs_lower = spzeros(Float64, Int32, num_states) - state_action_probs_upper = spzeros(Float64, Int32, num_states) + src, act, dest, lower, upper, _ = read_prism_transition_line(cur_line) + + for jₛ in 1:num_src_states + for jₐ in 1:num_actions + state_action_probs_lower = spzeros(Float64, Int32, num_states) + state_action_probs_upper = spzeros(Float64, Int32, num_states) + + if src != jₛ - 1 + throw(ArgumentError("Transitions file is not sorted by source index or the number of actions was less than expected. Expected source index $(jₛ - 1), got $src.")) + end - cur_src = src - cur_act_idx = act_idx + if act != jₐ - 1 + throw(ArgumentError("Transitions file is not sorted by action index or the number of actions was less than expected. Expected action index $(jₐ - 1), got $act.")) + end - if src != outer_src - # PRISM uses 0-based indexing - stateptr[src + 1] = j - outer_src = src - end + while src == jₛ - 1 && act == jₐ - 1 + # PRISM uses 0-based indexing + state_action_probs_lower[dest + 1] = lower + state_action_probs_upper[dest + 1] = upper - while src == cur_src && act_idx == cur_act_idx - # PRISM uses 0-based indexing - state_action_probs_lower[dest + 1] = lower - state_action_probs_upper[dest + 1] = upper + next = iterate(lines_it, state) + if isnothing(next) + break + end - next = iterate(lines_it, state) - if isnothing(next) - break + cur_line, state = next + src, act, dest, lower, upper, _ = read_prism_transition_line(cur_line) end - cur_line, state = next - src, act_idx, dest, lower, upper, act = read_prism_transition_line(cur_line) + j = (jₛ - 1) * num_actions + jₐ + probs_lower[j] = state_action_probs_lower + probs_upper[j] = state_action_probs_upper end - - probs_lower[j] = state_action_probs_lower - probs_upper[j] = state_action_probs_upper end # Colptr is the same for both lower and upper @@ -377,9 +375,9 @@ function read_prism_transitions_file(tra_path, num_states) probs_upper_nzval, ) - probs = IntervalAmbiguitySet(; lower = probs_lower, upper = probs_upper) + probs = IntervalAmbiguitySets(; lower = probs_lower, upper = probs_upper) - return probs, stateptr + return probs, num_actions end end diff --git a/src/IntervalMDP.jl b/src/IntervalMDP.jl index 648d02e1..1787d800 100644 --- a/src/IntervalMDP.jl +++ b/src/IntervalMDP.jl @@ -59,6 +59,6 @@ export RobustValueIteration include("robust_value_iteration.jl") ### Saving and loading models -# include("Data/Data.jl") +include("Data/Data.jl") end diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl index b8badbac..1104bfa1 100644 --- a/src/models/FactoredRobustMarkovDecisionProcess.jl +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -26,6 +26,8 @@ struct FactoredRobustMarkovDecisionProcess{ end end const FactoredRMDP = FactoredRobustMarkovDecisionProcess +const FactoredIMDP{N, M} = FactoredRMDP{N, M, P} where {P <: NTuple{N, <:Marginal{<:IntervalAmbiguitySets}}} +const IMDP{M} = FactoredIMDP{1, M} function FactoredRMDP( state_vars::NTuple{N, Int}, diff --git a/src/models/IntervalMarkovDecisionProcess.jl b/src/models/IntervalMarkovDecisionProcess.jl index bdb49860..77d23f60 100644 --- a/src/models/IntervalMarkovDecisionProcess.jl +++ b/src/models/IntervalMarkovDecisionProcess.jl @@ -13,7 +13,7 @@ function IntervalMarkovDecisionProcess(marginal::Marginal{<:IntervalAmbiguitySet ) end -function IntervalMarkovDecisionProcess(ambiguity_set::IntervalAmbiguitySets, num_actions::Int, initial_states::InitialStates = AllStates()) +function IntervalMarkovDecisionProcess(ambiguity_set::IntervalAmbiguitySets, num_actions::Integer, initial_states::InitialStates = AllStates()) state_indices = (1,) action_indices = (1,) diff --git a/src/models/models.jl b/src/models/models.jl index 7139da3d..43e0cb92 100644 --- a/src/models/models.jl +++ b/src/models/models.jl @@ -5,7 +5,7 @@ export IntervalMarkovProcess, AllStates export num_states, num_actions, initial_states include("FactoredRobustMarkovDecisionProcess.jl") -export FactoredRobustMarkovDecisionProcess, state_variables, action_variables +export FactoredRobustMarkovDecisionProcess, state_variables, action_variables, marginals # Convenience model constructors - they all return a FactoredRobustMarkovDecisionProcess include("IntervalMarkovChain.jl") diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index c1336af7..23a3a14f 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -38,7 +38,7 @@ sparse_prob = IntervalAmbiguitySets(; [1] M. Lahijanian, S. B. Andersson and C. Belta, "Formal Verification and Synthesis for Discrete-Time Stochastic Systems," in IEEE Transactions on Automatic Control, vol. 60, no. 8, pp. 2031-2045, Aug. 2015, doi: 10.1109/TAC.2015.2398883. """ -struct IntervalAmbiguitySets{R, MR <: AbstractMatrix{R}} <: AbstractAmbiguitySets +struct IntervalAmbiguitySets{R, MR <: AbstractMatrix{R}} <: PolytopicAmbiguitySets lower::MR gap::MR @@ -149,7 +149,7 @@ function Base.iterate(p::IntervalAmbiguitySets, state) end Base.length(p::IntervalAmbiguitySets) = num_sets(p) -struct IntervalAmbiguitySet{R, VR <: AbstractVector{R}} +struct IntervalAmbiguitySet{R, VR <: AbstractVector{R}} <: PolytopicAmbiguitySet lower::VR gap::VR end diff --git a/src/probabilities/Marginal.jl b/src/probabilities/Marginal.jl index 2c09534b..3f2ec1aa 100644 --- a/src/probabilities/Marginal.jl +++ b/src/probabilities/Marginal.jl @@ -25,10 +25,10 @@ const Marginal = SARectangularMarginal function Marginal( ambiguity_sets::A, - state_indices::NTuple{N, Int}, - action_indices::NTuple{M, Int}, - source_dims::NTuple{N, Int}, - action_vars::NTuple{M, Int}, + state_indices::NTuple{N, <:Integer}, + action_indices::NTuple{M, <:Integer}, + source_dims::NTuple{N, <:Integer}, + action_vars::NTuple{M, <:Integer}, ) where {A <: AbstractAmbiguitySets, N, M} state_indices_32 = Int32.(state_indices) action_indices_32 = Int32.(action_indices) diff --git a/src/probabilities/probabilities.jl b/src/probabilities/probabilities.jl index 369db123..3ccf41d0 100644 --- a/src/probabilities/probabilities.jl +++ b/src/probabilities/probabilities.jl @@ -1,5 +1,8 @@ abstract type AbstractAmbiguitySets end +abstract type PolytopicAmbiguitySets <: AbstractAmbiguitySets end + abstract type AbstractAmbiguitySet end +abstract type PolytopicAmbiguitySet <: AbstractAmbiguitySet end """ num_sets(ambiguity_sets::AbstractAmbiguitySets) diff --git a/test/data/bmdp_tool.jl b/test/data/bmdp_tool.jl index dd145bf7..063d341c 100644 --- a/test/data/bmdp_tool.jl +++ b/test/data/bmdp_tool.jl @@ -4,50 +4,67 @@ using IntervalMDP, IntervalMDP.Data, SparseArrays # Read MDP mdp, tstates = read_bmdp_tool_file("data/multiObj_robotIMDP.txt") -# Write it back -new_path = tempname() * ".txt" -write_bmdp_tool_file(new_path, mdp, tstates) +marginal = marginals(mdp)[1] +ambiguity_sets = marginal.ambiguity_sets -# Check the file is there -@test isfile(new_path) +@testset "write/read model,tstates" begin + # Write model + new_path = tempname() * ".txt" + write_bmdp_tool_file(new_path, mdp, tstates) -# Read new file and check that the models are the same -new_mdp, new_tstates = read_bmdp_tool_file(new_path) -rm(new_path) + # Check the file is there + @test isfile(new_path) -@test num_states(mdp) == num_states(new_mdp) + # Read new file and check that the models are the same + new_mdp, new_tstates = read_bmdp_tool_file(new_path) + rm(new_path) -transition_probabilities = transition_prob(mdp) -new_transition_probabilities = transition_prob(new_mdp) + @test num_states(mdp) == num_states(new_mdp) -@test size(transition_probabilities) == size(new_transition_probabilities) -@test lower(transition_probabilities) ≈ lower(new_transition_probabilities) -@test gap(transition_probabilities) ≈ gap(new_transition_probabilities) + new_marginal = marginals(new_mdp)[1] + new_ambiguity_sets = new_marginal.ambiguity_sets -@test tstates == new_tstates + @test source_shape(marginal) == source_shape(new_marginal) + @test action_shape(marginal) == action_shape(new_marginal) + @test num_target(marginal) == num_target(new_marginal) + @test state_variables(mdp) == state_variables(new_mdp) + @test action_variables(mdp) == action_variables(new_mdp) -# Write problem -tstates = [CartesianIndex(207)] -prop = FiniteTimeReachability(tstates, 10) -spec = Specification(prop, Pessimistic, Maximize) -problem = VerificationProblem(mdp, spec) + @test ambiguity_sets.lower ≈ new_ambiguity_sets.lower + @test ambiguity_sets.gap ≈ new_ambiguity_sets.gap -new_path = tempname() * ".txt" -write_bmdp_tool_file(new_path, problem) + @test tstates == new_tstates +end -# Check the file is there -@test isfile(new_path) +@testset "write/read problem" begin + # Write problem + tstates = [CartesianIndex(207)] + prop = FiniteTimeReachability(tstates, 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) -# Read new file and check that the models represent the same system -new_mdp, new_tstates = read_bmdp_tool_file(new_path) + new_path = tempname() * ".txt" + write_bmdp_tool_file(new_path, problem) -@test num_states(mdp) == num_states(new_mdp) + # Check the file is there + @test isfile(new_path) -transition_probabilities = transition_prob(mdp) -new_transition_probabilities = transition_prob(new_mdp) + # Read new file and check that the models represent the same system + new_mdp, new_tstates = read_bmdp_tool_file(new_path) -@test size(transition_probabilities) == size(new_transition_probabilities) -@test lower(transition_probabilities) ≈ lower(new_transition_probabilities) -@test gap(transition_probabilities) ≈ gap(new_transition_probabilities) + @test num_states(mdp) == num_states(new_mdp) -@test tstates == new_tstates + new_marginal = marginals(new_mdp)[1] + new_ambiguity_sets = new_marginal.ambiguity_sets + + @test source_shape(marginal) == source_shape(new_marginal) + @test action_shape(marginal) == action_shape(new_marginal) + @test num_target(marginal) == num_target(new_marginal) + @test state_variables(mdp) == state_variables(new_mdp) + @test action_variables(mdp) == action_variables(new_mdp) + + @test ambiguity_sets.lower ≈ new_ambiguity_sets.lower + @test ambiguity_sets.gap ≈ new_ambiguity_sets.gap + + @test tstates == new_tstates +end \ No newline at end of file diff --git a/test/data/intervalmdp.jl b/test/data/intervalmdp.jl index dc63055a..0f3e610d 100644 --- a/test/data/intervalmdp.jl +++ b/test/data/intervalmdp.jl @@ -1,6 +1,9 @@ using Revise, Test using IntervalMDP, IntervalMDP.Data, SparseArrays +mdp, tstates = read_bmdp_tool_file("data/multiObj_robotIMDP.txt") +write_intervalmdp_jl_model("data/multiObj_robotIMDP.nc", mdp) + @testset "io model" begin # Read MDP mdp = read_intervalmdp_jl_model("data/multiObj_robotIMDP.nc") @@ -15,12 +18,20 @@ using IntervalMDP, IntervalMDP.Data, SparseArrays @test num_states(mdp) == num_states(new_mdp) - transition_probabilities = transition_prob(mdp) - new_transition_probabilities = transition_prob(new_mdp) + marginal = marginals(mdp)[1] + new_marginal = marginals(new_mdp)[1] + + ambiguity_sets = marginal.ambiguity_sets + new_ambiguity_sets = new_marginal.ambiguity_sets + + @test source_shape(marginal) == source_shape(new_marginal) + @test action_shape(marginal) == action_shape(new_marginal) + @test num_target(marginal) == num_target(new_marginal) + @test state_variables(mdp) == state_variables(new_mdp) + @test action_variables(mdp) == action_variables(new_mdp) - @test size(transition_probabilities) == size(new_transition_probabilities) - @test lower(transition_probabilities) ≈ lower(new_transition_probabilities) - @test gap(transition_probabilities) ≈ gap(new_transition_probabilities) + @test ambiguity_sets.lower ≈ new_ambiguity_sets.lower + @test ambiguity_sets.gap ≈ new_ambiguity_sets.gap end @testset "io specification" begin diff --git a/test/data/multiObj_robotIMDP.nc b/test/data/multiObj_robotIMDP.nc index c7797bf3d7d66d05c3080c054b12224b6a139c6c..807fadabb908cd2c40b1dbffc0931cf288f6fb64 100644 GIT binary patch delta 1158 zcmZ8gTS!z<6g}tMJL5QW?>I9ijPFN25^WqQ%_sO6UxoNUF?-pdmfGN$&k&}MA}fe; z*pv{O1(qd|!a$&eSbYS7^%I0hlwU?#AEh9DG~IL0y;_}*yZ71WoVE5|d*3J5$@h6Y z9Ffx5gydys230ms->52g)DqE#Nt_(ybnFbgI5bU95T;v`iGKSD{UTJu3_@DfK}d*} ze3&!EXoN(Bu=i35Cf;ZHnDcH#1_%iV@rZ+qj`+l*nwP7L5@EcD0uYR466*Vv1-Uq1 zuWk?z*m4fOVLg5W+c=s3c=c{8B#_hu7f&A{RL2ZL2GvpQqY?jQcu<%LPe}{R8BURR z8VD0SWPyia<36MF_V{d7arqH#EWHnJ-J$6nN-nJA1`Pwcpx~iE)sgLwwOUSsuZK>t zc)x_h9vOnzrCRl_{HXgh$M!-ALK?7f+2#YOZ7pXq^3!eU2W^?5?u`zpX|zI2h?V$5 zd^sV%17Ox*fx|^+I9rfMI>W$h^x%_l%b1)rF%fl#X3iLqnL+P#MrDLg#=U}HWT$-# zI){?27TSX*MInhDeAXkRgPDRh15J@Z*1)hSQh%sY2$BVghIMziT`hrK`Dyu&RUVy9 zrTP&|&bBQ5VsSJynD3JnyCi)NxGZ|&2&VGaXOwte?G~{nPs3m1^$(nu4j96`(s3m8 zb#=9NH#NImUA^6FO8vivHeX1b$Dukn4`#$BGMF!E)+5j^vYfL*D070n)E>Rd{JfME zQq7Dc26Z;!d||w>63l|BZqKp=Jp!wo-IooZY)l|i zl~R1Cca6ka3lEDtL|X=4Q$*dOD8Alheq7GbE16N56$!ObG>m!mKQpkJI|uunt|@7X z=-QfI`l`t6gh@w*xTQc2(o@=0Xf3H|GHY&rksGN%Dzk_TF86>WqYSBbP8&|2Vm^Qx zY3X`;oxQD+%%f#2FeYA`r(imH061(r0b*$zKmC4S-M7l+^1ctrLp-!MT1ojaNgJZ` JU2(=~{s2|^^Ys7# delta 5658 zcmeHKX-rf{9H000?Ex$-tged40&;1ii&#ZWMUi7!6cu4n2^CnmB!H{1>|%w!)ItfZ z7Yw9$P}J7z19Gf25bIT~iM0ya8d;&Rv*Hc&Mq7q1X4>^@t6FG2O6)~)^Z5%sK)UmT82?#8? zy5grliJt(hWG5gGz=8-!fecUpWE%?$3?_h8IHJGmt4e~X?h7MGs3tsT00};fHJFT# zAC`BhjAzyfg6PBo8l3=^iXt7WM5C|q$~?G`bR^6$mYhyqkZL@jnVjLT;7)wR z9t6ON7r42K?kk2X-9n(9%v49}ma=C2AZNGr+$ni9G6GhJ=TDtIV|`}!+^}Fx@Epx- zR@-X}#!^ik>=aKTR>0tdiOLSnS3G7X_}Wb(gD>2ckunuLFDfJ5onbgNZOyuuhZQSE z(OVf;jyaL#^j;LJrj*w<84GW%7IttZ{$BEYCS%zKl^0ZrBWqkz$s;~6-KC6lQDD@- zJz){e|Becn$Rl-7qwtltIJ2-9%bWT~wYDjm$Y>2TxR#*^_PTmH?_XutVvTYy0I2Gn z-qU~@e}>(FqR)gjw;*#PN(M9zDex13%{#sKA(g}A_)z4NB6v#aNDeD8LoZpL`7f+b zro;zDM27{>06b>n1AW9A8P-KR*)DOc{9)s`1$2a77X`!^5PKtYT1OuK3?Q5Md>&b6 z;KhTm#e=Z62(8Q~tMVtm6q~KOP8SvNeOlqOo|_I7 zS=^MfYq%gi(!w|fp{=A7!jDLZAms~(wi zBX#8D&a&-ojdstzaH;!HdS&P@1{X`Y@Agf#tvk%yGCT)M0z z?$_pAPrhc}QPeP4I$Ffau@7>>4CEgt{ zTj~dvTRMKFP0!kIm1Vd0##tBiEmmyq_Eitp+tkm>1Ft@*JrdTR+_|~u!=U=X=%=Ue zWi^-F%u9Xt*}R!8T`4^yJ1nOM%e>ayi}ZTY9k_6KcjwHJilCWK>YB^?Q+kvC*mC9X zzL+ca%!)?)xp6 zk2Wze8su%9iMNS43#jf{K9^Jb5?UuQK>wpX37Lhl+6{B$AjfsPg zV)7|mKG2{DhP!m*m;O1Rm1+}o@u*+pYUTP6lEEu(AWt!acO6sHc!g{H>@F%a=pzYS zf{K20ow+4|H@Yo4jQlCXne$SnR3-Qv?gyw;3)tPUQgxV0D6P*`hbgjOkFuy-4re7y zB0n*3ndIVZ^_85XXl){@dB787;Q)I%@px#aPzIGC8vG8QS4=lWtNi@5=cQkSK28h7 zA+_BuK!vt^K+Dh`+*C2ZS)I z#%rCupp_A`w@UAU8b+*JG~*F)Kt$Ex{-phntv(V+pw5LbnTrse0UR!G45x0?r%8++ zF2d;AM6YV5)8bD#6fq;C2a7+dnk9(?%?#Emw$b7RmRH~jupaR;60{a|KfwP8zz6D+ z&mJIfS7(Mx?uJ}bt|33I$Yd~O6}fYa35usK&o^4K3ez%-`5R56cN9VKrf}PZMhnVv z{<0w`-ZUq8TIWRJ8v}Zd^PuY8kl?2Bpm62GKJDZW_*x_=-Xa;p1c4n!S%C+^j3P*ru>p?IA2wzYSV0 Date: Wed, 3 Sep 2025 19:04:44 +0200 Subject: [PATCH 06/71] Fix size checks --- src/models/IntervalMarkovChain.jl | 4 +- src/models/IntervalMarkovDecisionProcess.jl | 9 +-- src/probabilities/IntervalAmbiguitySets.jl | 73 +++++++++++++++++---- src/probabilities/Marginal.jl | 35 ++++++++-- 4 files changed, 93 insertions(+), 28 deletions(-) diff --git a/src/models/IntervalMarkovChain.jl b/src/models/IntervalMarkovChain.jl index e338b01b..868969f4 100644 --- a/src/models/IntervalMarkovChain.jl +++ b/src/models/IntervalMarkovChain.jl @@ -19,11 +19,9 @@ end function IntervalMarkovChain(ambiguity_set::IntervalAmbiguitySets, initial_states=AllStates()) - state_indices = (1,) - action_indices = (1,) source_dims = (num_sets(ambiguity_set),) action_vars = (1,) - marginal = Marginal(ambiguity_set, state_indices, action_indices, source_dims, action_vars) + marginal = Marginal(ambiguity_set, source_dims, action_vars) return IntervalMarkovChain(marginal, initial_states) end \ No newline at end of file diff --git a/src/models/IntervalMarkovDecisionProcess.jl b/src/models/IntervalMarkovDecisionProcess.jl index 77d23f60..e6d4ccc7 100644 --- a/src/models/IntervalMarkovDecisionProcess.jl +++ b/src/models/IntervalMarkovDecisionProcess.jl @@ -14,16 +14,13 @@ function IntervalMarkovDecisionProcess(marginal::Marginal{<:IntervalAmbiguitySet end function IntervalMarkovDecisionProcess(ambiguity_set::IntervalAmbiguitySets, num_actions::Integer, initial_states::InitialStates = AllStates()) - state_indices = (1,) - action_indices = (1,) - if num_sets(ambiguity_set) % num_actions != 0 throw(ArgumentError("The number of sets in the ambiguity set must be a multiple of the number of actions.")) end source_dims = (num_sets(ambiguity_set) ÷ num_actions,) action_vars = (num_actions,) - marginal = Marginal(ambiguity_set, state_indices, action_indices, source_dims, action_vars) + marginal = Marginal(ambiguity_set, source_dims, action_vars) return IntervalMarkovDecisionProcess(marginal, initial_states) end @@ -55,11 +52,9 @@ function interval_prob_hcat( ambiguity_set = IntervalAmbiguitySets(l, g) - stateindices = (1,) - actionindices = (1,) source_dims = (length(ps),) action_vars = (num_actions,) - marginal = Marginal(ambiguity_set, stateindices, actionindices, source_dims, action_vars) + marginal = Marginal(ambiguity_set, source_dims, action_vars) return marginal end \ No newline at end of file diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index 23a3a14f..80e21559 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -90,32 +90,81 @@ function compute_gap( end function checkprobabilities(lower::AbstractMatrix, gap::AbstractMatrix) - @assert all(lower .>= 0) "The lower bound transition probabilities must be non-negative." - @assert all(gap .>= 0) "The gap transition probabilities must be non-negative." - @assert all(lower .+ gap .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1." + if size(lower) != size(gap) + throw(ArgumentError("The lower and gap matrices must have the same size.")) + end + + if any(lower .< 0) + throw(ArgumentError("The lower bound transition probabilities must be non-negative.")) + end + + if any(lower .> 1) + throw(ArgumentError("The lower bound transition probabilities must be less than or equal to 1.")) + end + + if any(gap .< 0) + throw(ArgumentError("The gap transition probabilities must be non-negative.")) + end + + if any(gap .> 1) + throw(ArgumentError("The gap transition probabilities must be less than or equal to 1.")) + end + + if any(lower .+ gap .> 1) + throw(ArgumentError("The sum of lower and gap transition probabilities must be less than or equal to 1.")) + end sum_lower = vec(sum(lower; dims = 1)) max_lower_bound = maximum(sum_lower) - @assert max_lower_bound <= 1 "The joint lower bound transition probability per column (max is $max_lower_bound) should be less than or equal to 1." + if max_lower_bound > 1 + throw(ArgumentError("The joint lower bound transition probability per column (max is $max_lower_bound) should be less than or equal to 1.")) + end - sum_upper = vec(sum(lower + gap; dims = 1)) + sum_upper = sum_lower .+ vec(sum(gap; dims = 1)) max_upper_bound = minimum(sum_upper) - @assert max_upper_bound >= 1 "The joint upper bound transition probability per column (min is $max_upper_bound) should be greater than or equal to 1." + if max_upper_bound < 1 + throw(ArgumentError("The joint upper bound transition probability per column (min is $max_upper_bound) should be greater than or equal to 1.")) + end end function checkprobabilities!(lower::AbstractSparseMatrix, gap::AbstractSparseMatrix) - @assert all(nonzeros(lower) .>= 0) "The lower bound transition probabilities must be non-negative." - @assert all(nonzeros(gap) .>= 0) "The gap transition probabilities must be non-negative." - @assert all(nonzeros(lower) .+ nonzeros(gap) .<= 1) "The sum of lower and gap transition probabilities must be less than or equal to 1." + if size(lower) != size(gap) + throw(ArgumentError("The lower and gap matrices must have the same size.")) + end + + if any(nonzeros(lower) .< 0) + throw(ArgumentError("The lower bound transition probabilities must be non-negative.")) + end + + if any(nonzeros(lower) .> 1) + throw(ArgumentError("The lower bound transition probabilities must be less than or equal to 1.")) + end + + if any(nonzeros(gap) .< 0) + throw(ArgumentError("The gap transition probabilities must be non-negative.")) + end + + if any(nonzeros(gap) .> 1) + throw(ArgumentError("The gap transition probabilities must be less than or equal to 1.")) + end + + if any(nonzeros(lower) .+ nonzeros(gap) .> 1) + throw(ArgumentError("The sum of lower and gap transition probabilities must be less than or equal to 1.")) + end sum_lower = vec(sum(lower; dims = 1)) max_lower_bound = maximum(sum_lower) - @assert max_lower_bound <= 1 "The joint lower bound transition probability per column (max is $max_lower_bound) should be less than or equal to 1." + if max_lower_bound > 1 + throw(ArgumentError("The joint lower bound transition probability per column (max is $max_lower_bound) should be less than or equal to 1.")) + end - sum_upper = vec(sum(lower + gap; dims = 1)) + sum_upper = sum_lower .+ vec(sum(gap; dims = 1)) max_upper_bound = minimum(sum_upper) - @assert max_upper_bound >= 1 "The joint upper bound transition probability per column (min is $max_upper_bound) should be greater than or equal to 1." + if max_upper_bound < 1 + throw(ArgumentError("The joint upper bound transition probability per column (min is $max_upper_bound) should be greater than or equal to 1.")) + end end + num_target(p::IntervalAmbiguitySets) = size(p.lower, 1) num_sets(p::IntervalAmbiguitySets) = size(p.lower, 2) source_shape(p::IntervalAmbiguitySets) = (num_sets(p),) diff --git a/src/probabilities/Marginal.jl b/src/probabilities/Marginal.jl index 3f2ec1aa..e8811122 100644 --- a/src/probabilities/Marginal.jl +++ b/src/probabilities/Marginal.jl @@ -39,15 +39,38 @@ function Marginal( return SARectangularMarginal(ambiguity_sets, state_indices_32, action_indices_32, source_dims_32, action_vars_32) end -# Constructor if no state/action indices are given (i.e. only one state and one action variable) +function Marginal(ambiguity_sets::A, source_dims, action_vars) where {A <: AbstractAmbiguitySets} + return Marginal(ambiguity_sets, (1,), (1,), source_dims, action_vars) +end function checkindices(ambiguity_sets, state_indices, action_indices, source_dims, action_vars) - # TODO: More checks incl. consistency with ambiguity sets - @assert all(state_indices .> 0) "State indices must be positive." - @assert all(action_indices .> 0) "Action indices must be positive." + if length(state_indices) != length(source_dims) + throw(ArgumentError("Length of state indices must match length of source dimensions.")) + end + + if length(action_indices) != length(action_vars) + throw(ArgumentError("Length of action indices must match length of action dimensions.")) + end + + if any(state_indices .<= 0) + throw(ArgumentError("State indices must be positive.")) + end - @assert length(state_indices) == length(source_dims) "Length of state indices must match length of source dimensions." - @assert length(action_indices) == length(action_vars) "Length of action indices must match length of action dimensions." + if any(action_indices .<= 0) + throw(ArgumentError("Action indices must be positive.")) + end + + if any(source_dims .<= 0) + throw(ArgumentError("Source dimensions must be positive.")) + end + + if any(action_vars .<= 0) + throw(ArgumentError("Action dimensions must be positive.")) + end + + if prod(source_dims) * prod(action_vars) != num_sets(ambiguity_sets) + throw(ArgumentError("The number of ambiguity sets must match the product of source dimensions and action dimensions.")) + end end ambiguity_sets(p::Marginal) = p.ambiguity_sets From b5f90edc25daa072a1f4399139887acc4cf15165 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 4 Sep 2025 13:55:29 +0200 Subject: [PATCH 07/71] Add and test bellman kernel for products of IntervalAmbiguitySets --- Project.toml | 9 +- src/IntervalMDP.jl | 9 +- src/algorithms.jl | 22 +- src/bellman.jl | 195 +- .../FactoredRobustMarkovDecisionProcess.jl | 26 +- src/probabilities/IntervalAmbiguitySets.jl | 69 +- src/probabilities/Marginal.jl | 4 +- src/robust_value_iteration.jl | 4 +- src/strategy_cache.jl | 14 +- src/workspace.jl | 124 +- test/base/base.jl | 2 +- test/base/bellman.jl | 12 +- test/base/factored.jl | 1698 +++++++++++++++++ test/base/orthogonal.jl | 1369 ------------- test/sparse/bellman.jl | 12 +- 15 files changed, 2101 insertions(+), 1468 deletions(-) create mode 100644 test/base/factored.jl delete mode 100644 test/base/orthogonal.jl diff --git a/Project.toml b/Project.toml index 98d0c149..cdd0854e 100644 --- a/Project.toml +++ b/Project.toml @@ -4,8 +4,11 @@ authors = ["Frederik Baymler Mathiesen and contributors"] version = "0.6.0" [deps] +Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" CommonSolve = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2" +HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +JuMP = "4076af6c-e467-56ae-b986-b466b2749572" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" @@ -16,15 +19,15 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" -# [extensions] -# IntervalMDPCudaExt = ["Adapt", "CUDA", "GPUArrays", "LLVM"] - [compat] Adapt = "4" CUDA = "5.1" +Combinatorics = "1.0.3" CommonSolve = "0.2.4" GPUArrays = "10, 11" +HiGHS = "1.19.0" JSON = "0.21.4" +JuMP = "1.29.0" LLVM = "7, 8, 9" NCDatasets = "0.13, 0.14" julia = "1.9" diff --git a/src/IntervalMDP.jl b/src/IntervalMDP.jl index 1787d800..af1fd77b 100644 --- a/src/IntervalMDP.jl +++ b/src/IntervalMDP.jl @@ -7,6 +7,8 @@ export solve # Import necessary libraries using LinearAlgebra, SparseArrays +using JuMP, HiGHS +using Combinatorics: permutations, Permutations ### Utilities const UnionIndex = Union{<:Integer, <:Tuple} @@ -47,15 +49,16 @@ export value_function, residual, num_iterations include("cuda.jl") ### Solving +include("algorithms.jl") +export OMaximization, LPMcCormickRelaxation +export RobustValueIteration + include("utils.jl") include("threading.jl") include("workspace.jl") include("strategy_cache.jl") include("bellman.jl") -include("algorithms.jl") -export RobustValueIteration - include("robust_value_iteration.jl") ### Saving and loading models diff --git a/src/algorithms.jl b/src/algorithms.jl index 01a6046f..cdb9445f 100644 --- a/src/algorithms.jl +++ b/src/algorithms.jl @@ -1,4 +1,15 @@ -abstract type AbstractIntervalMDPAlgorithm end +abstract type BellmanAlgorithm end +struct OMaximization <: BellmanAlgorithm end +Base.@kwdef struct LPMcCormickRelaxation{O} <: BellmanAlgorithm + lp_optimizer::O = HiGHS.Optimizer +end + +default_bellman_algorithm(::IMDP) = OMaximization() +default_bellman_algorithm(::IntervalAmbiguitySets) = OMaximization() +default_bellman_algorithm(::FactoredRMDP{N, M, <:NTuple{N, <:Marginal{<:PolytopicAmbiguitySets}}}) where {N, M} = LPMcCormickRelaxation() +default_bellman_algorithm(pp::ProductProcess) = default_bellman_algorithm(markov_process(pp)) + +abstract type ModelCheckingAlgorithm end ########################## # Robust Value Iteration # @@ -10,19 +21,22 @@ A robust value iteration algorithm for solving interval Markov decision processe This algorithm is designed to handle both finite and infinite time specifications, optimizing for either the maximum or minimum expected value based on the given specification. """ -struct RobustValueIteration <: AbstractIntervalMDPAlgorithm end +struct RobustValueIteration{B <: BellmanAlgorithm} <: ModelCheckingAlgorithm + bellman_alg::B +end +bellman_algorithm(alg::RobustValueIteration) = alg.bellman_alg ############################ # Interval Value Iteration # ############################ # TODO: Provide implementation for this algorithm. When provided, consider changing the default algorithm. -struct IntervalValueIteration <: AbstractIntervalMDPAlgorithm end +struct IntervalValueIteration <: ModelCheckingAlgorithm end # TODO: Consider topological value iteration as an alternative algorithm (infinite time only). ##### Default algorithm for solving Interval MDP problems -default_algorithm(::AbstractIntervalMDPProblem) = RobustValueIteration() +default_algorithm(problem::AbstractIntervalMDPProblem) = RobustValueIteration(default_bellman_algorithm(system(problem))) solve(problem::AbstractIntervalMDPProblem; kwargs...) = solve(problem, default_algorithm(problem); kwargs...) diff --git a/src/bellman.jl b/src/bellman.jl index 58dda842..dd02b1c6 100644 --- a/src/bellman.jl +++ b/src/bellman.jl @@ -61,10 +61,10 @@ Vcur = IntervalMDP.bellman(Vprev, model; upper_bound = false) [1] M. Lahijanian, S. B. Andersson and C. Belta, "Formal Verification and Synthesis for Discrete-Time Stochastic Systems," in IEEE Transactions on Automatic Control, vol. 60, no. 8, pp. 2031-2045, Aug. 2015, doi: 10.1109/TAC.2015.2398883. """ -function bellman(V, model; upper_bound = false, maximize = true) +function bellman(V, model, alg=default_bellman_algorithm(model); upper_bound = false, maximize = true) Vres = similar(V, source_shape(model)) - return bellman!(Vres, V, model; upper_bound = upper_bound, maximize = maximize) + return bellman!(Vres, V, model, alg; upper_bound = upper_bound, maximize = maximize) end """ @@ -137,8 +137,8 @@ Vres = IntervalMDP.bellman!(workspace, strategy_cache, Vres, V, model; upper_bou """ function bellman! end -function bellman!(Vres, V, model; upper_bound = false, maximize = true) - workspace = construct_workspace(model) +function bellman!(Vres, V, model, alg=default_bellman_algorithm(model); upper_bound = false, maximize = true) + workspace = construct_workspace(model, alg) strategy_cache = construct_strategy_cache(model) return bellman!( @@ -244,7 +244,7 @@ end # Non-threaded function _bellman_helper!( - workspace::Union{DenseIntervalWorkspace, SparseIntervalWorkspace}, + workspace::Union{DenseIntervalOMaxWorkspace, SparseIntervalOMaxWorkspace}, strategy_cache::AbstractStrategyCache, Vres, V, @@ -274,7 +274,7 @@ end # Threaded function _bellman_helper!( - workspace::Union{ThreadedDenseIntervalWorkspace, ThreadedSparseIntervalWorkspace}, + workspace::Union{ThreadedDenseIntervalOMaxWorkspace, ThreadedSparseIntervalOMaxWorkspace}, strategy_cache::AbstractStrategyCache, Vres, V, @@ -304,7 +304,7 @@ function _bellman_helper!( end function bellman_precomputation!( - workspace::Union{DenseIntervalWorkspace, ThreadedDenseIntervalWorkspace}, + workspace::Union{DenseIntervalOMaxWorkspace, ThreadedDenseIntervalOMaxWorkspace}, V, upper_bound, ) @@ -313,13 +313,13 @@ function bellman_precomputation!( end bellman_precomputation!( - workspace::Union{SparseIntervalWorkspace, ThreadedSparseIntervalWorkspace}, + workspace::Union{SparseIntervalOMaxWorkspace, ThreadedSparseIntervalOMaxWorkspace}, V, upper_bound, ) = nothing function state_bellman!( - workspace::Union{DenseIntervalWorkspace, SparseIntervalWorkspace}, + workspace::IMDPWorkspace, strategy_cache::OptimizingStrategyCache, Vres, V, @@ -335,12 +335,12 @@ function state_bellman!( workspace.actions[jₐ] = state_action_bellman(workspace, V, ambiguity_set, budget, upper_bound) end - Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, action_shape(marginal), maximize) + Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, maximize) end end function state_bellman!( - workspace::Union{DenseIntervalWorkspace, SparseIntervalWorkspace}, + workspace::IMDPWorkspace, strategy_cache::NonOptimizingStrategyCache, Vres, V, @@ -358,7 +358,7 @@ function state_bellman!( end Base.@propagate_inbounds function state_action_bellman( - workspace::DenseIntervalWorkspace, + workspace::DenseIntervalOMaxWorkspace, V, ambiguity_set, budget, @@ -393,7 +393,7 @@ Base.@propagate_inbounds function gap_value( end Base.@propagate_inbounds function state_action_bellman( - workspace::SparseIntervalWorkspace, + workspace::SparseIntervalOMaxWorkspace, V, ambiguity_set, budget, @@ -430,6 +430,175 @@ Base.@propagate_inbounds function gap_value( return res end +################################################## +# McCormick relaxation-based Bellman over fRMDPs # +################################################## + +# Non-threaded +function _bellman_helper!( + workspace::FactoredIntervalMcCormickWorkspace, + strategy_cache::AbstractStrategyCache, + Vres, + V, + model; + upper_bound = false, + maximize = true, +) + for jₛ in CartesianIndices(source_shape(model)) + state_bellman!( + workspace, + strategy_cache, + Vres, + V, + model, + jₛ, + upper_bound, + maximize, + ) + end + + return Vres +end + +# Threaded +function _bellman_helper!( + workspace::ThreadedFactoredIntervalMcCormickWorkspace, + strategy_cache::AbstractStrategyCache, + Vres, + V, + model; + upper_bound = false, + maximize = true, +) + @threadstid tid for jₛ in CartesianIndices(source_shape(model)) + @inbounds ws = workspace[tid] + state_bellman!( + ws, + strategy_cache, + Vres, + V, + model, + jₛ, + upper_bound, + maximize, + ) + end + + return Vres +end + + +function state_bellman!( + workspace::FactoredIntervalMcCormickWorkspace, + strategy_cache::OptimizingStrategyCache, + Vres, + V, + model, + jₛ, + upper_bound, + maximize, +) + @inbounds begin + for jₐ in CartesianIndices(action_shape(model)) + ambiguity_sets = getindex.(marginals(model), jₐ, jₛ) + workspace.actions[jₐ] = state_action_bellman(workspace, V, ambiguity_sets, upper_bound) + end + + Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, maximize) + end +end + +function state_bellman!( + workspace::FactoredIntervalMcCormickWorkspace, + strategy_cache::NonOptimizingStrategyCache, + Vres, + V, + model, + jₛ, + upper_bound, + maximize, +) + @inbounds begin + jₐ = CartesianIndex(strategy_cache[jₛ]) + ambiguity_sets = getindex.(marginals(model), jₐ, jₛ) + Vres[jₛ] = state_action_bellman(workspace, V, ambiguity_sets, upper_bound) + end +end + +Base.@propagate_inbounds function state_action_bellman( + workspace::FactoredIntervalMcCormickWorkspace, + V::AbstractArray{R}, + ambiguity_sets, + upper_bound, +) where {R} + V = @view V[map(support, ambiguity_sets)...] + + model = workspace.model + JuMP.empty!(model) + + # Recursively add McCormick variables and constraints for each ambiguity set + p, _, _ = mccormick_branch(model, ambiguity_sets) + + if upper_bound + @objective(model, Max, sum(V[I] * p[I] for I in CartesianIndices(p))) + else + @objective(model, Min, sum(V[I] * p[I] for I in CartesianIndices(p))) + end + + JuMP.optimize!(model) + return JuMP.objective_value(model) +end + +function marginal_lp_constraints(model, ambiguity_set::IntervalAmbiguitySet{R}) where {R} + p = @variable(model, [1:length(support(ambiguity_set))]) + p_lower = map(i -> lower(ambiguity_set, i), support(ambiguity_set)) + p_upper = map(i -> upper(ambiguity_set, i), support(ambiguity_set)) + for i in eachindex(p) + set_lower_bound(p[i], p_lower[i]) + set_upper_bound(p[i], p_upper[i]) + end + @constraint(model, sum(p) == one(R)) + + return p, p_lower, p_upper +end + +function mccormick_branch(model, ambiguity_sets) + if length(ambiguity_sets) == 1 + return marginal_lp_constraints(model, ambiguity_sets[1]) + else + if length(ambiguity_sets) == 2 + p, p_lower, p_upper = marginal_lp_constraints(model, ambiguity_sets[1]) + q, q_lower, q_upper = marginal_lp_constraints(model, ambiguity_sets[2]) + else + mid = fld(length(ambiguity_sets), 2) + 1 + p, p_lower, p_upper = mccormick_branch(model, ambiguity_sets[1:mid]) + q, q_lower, q_upper = mccormick_branch(model, ambiguity_sets[mid+1:end]) + end + + # McCormick envelopes + sizes = (size(p)..., size(q)...) + w = Array{VariableRef}(undef, sizes) + w_lower = Array{eltype(p_lower)}(undef, sizes) + w_upper = Array{eltype(p_upper)}(undef, sizes) + for J in CartesianIndices(q) + for I in CartesianIndices(p) + w_lower[I, J] = p_lower[I] * q_lower[J] + w_upper[I, J] = p_upper[I] * q_upper[J] + + w[I, J] = @variable(model, lower_bound = w_lower[I, J], upper_bound = w_upper[I, J]) + @constraint(model, w[I, J] >= p[I] * q_lower[J] + q[J] * p_lower[I] − p_lower[I] * q_lower[J]) + @constraint(model, w[I, J] >= p[I] * q_upper[J] + q[J] * p_upper[I] − p_upper[I] * q_upper[J]) + @constraint(model, w[I, J] <= p[I] * q_upper[J] + q[J] * p_lower[I] − p_lower[I] * q_upper[J]) + @constraint(model, w[I, J] <= p[I] * q_lower[J] + q[J] * p_upper[I] − p_upper[I] * q_lower[J]) + end + end + @constraint(model, sum(w) == one(eltype(p_lower))) + + return w, w_lower, w_upper + end +end + + # ################################################################ # # Bellman operator for OrthogonalIntervalMarkovDecisionProcess # # ################################################################ diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl index 1104bfa1..1bddfd90 100644 --- a/src/models/FactoredRobustMarkovDecisionProcess.jl +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -30,12 +30,12 @@ const FactoredIMDP{N, M} = FactoredRMDP{N, M, P} where {P <: NTuple{N, <:Margina const IMDP{M} = FactoredIMDP{1, M} function FactoredRMDP( - state_vars::NTuple{N, Int}, - action_vars::NTuple{M, Int}, - source_dims::NTuple{N, Int}, - transition::P, + state_vars::NTuple{N, <:Integer}, + action_vars::NTuple{M, <:Integer}, + source_dims::NTuple{N, <:Integer}, + transition::NTuple{N, <:AbstractMarginal}, initial_states::VI = AllStates(), -) where {N, M, P <: NTuple{N, <:AbstractMarginal}, VI <: InitialStates} +) where {N, M, VI <: InitialStates} state_vars_32 = Int32.(state_vars) action_vars_32 = Int32.(action_vars) source_dims_32 = Int32.(source_dims) @@ -46,10 +46,10 @@ end function FactoredRMDP( state_vars::NTuple{N, <:Integer}, action_vars::NTuple{M, <:Integer}, - transition::P, + transition::NTuple{N, <:AbstractMarginal}, initial_states::VI = AllStates(), -) where {N, M, P <: NTuple{N, <:AbstractMarginal}, VI <: InitialStates} - return FactoredRobustMarkovDecisionProcess{N, M, P, VI}(state_vars, action_vars, state_vars, transition, initial_states) +) where {N, M, VI <: InitialStates} + return FactoredRobustMarkovDecisionProcess(state_vars, action_vars, state_vars, transition, initial_states) end function check_rmdp(state_vars, action_vars, source_dims, transition, initial_states) @@ -81,12 +81,14 @@ function check_transition(state_dims, action_dims, source_dims, transition) throw(DimensionMismatch("Marginal $i has incorrect number of target states. Expected $(state_dims[i]), got $(num_target(marginal)).")) end - if source_shape(marginal) != getindex.(Tuple(source_dims), state_variables(marginal)) # source_dims[state_variables(marginal)] - throw(DimensionMismatch("Marginal $i has incorrect source shape. Expected $source_dims, got $(source_shape(marginal)).")) + expected_source_shape = getindex.((source_dims,), state_variables(marginal)) + if source_shape(marginal) != expected_source_shape + throw(DimensionMismatch("Marginal $i has incorrect source shape. Expected $expected_source_shape, got $(source_shape(marginal)).")) end - if action_shape(marginal) != getindex.(Tuple(action_dims), action_variables(marginal)) - throw(DimensionMismatch("Marginal $i has incorrect action shape. Expected $action_dims, got $(action_shape(marginal)).")) + expected_action_shape = getindex.((action_dims,), action_variables(marginal)) + if action_shape(marginal) != expected_action_shape + throw(DimensionMismatch("Marginal $i has incorrect action shape. Expected $expected_action_shape, got $(action_shape(marginal)).")) end end end diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index 80e21559..72803611 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -203,6 +203,8 @@ struct IntervalAmbiguitySet{R, VR <: AbstractVector{R}} <: PolytopicAmbiguitySet gap::VR end +num_target(p::IntervalAmbiguitySet) = length(p.lower) + lower(p::IntervalAmbiguitySet) = p.lower lower(p::IntervalAmbiguitySet, destination) = p.lower[destination] @@ -217,4 +219,69 @@ support(p::IntervalAmbiguitySet{R, <:ColumnView{R}}) where {R} = eachindex(p.gap const SparseColumnView{Tv, Ti} = SubArray{Tv, 1, <:SparseArrays.AbstractSparseMatrixCSC{Tv, Ti}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} support(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) where {R} = rowvals(p.gap) -SparseArrays.nnz(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) where {R} = nnz(p.gap) \ No newline at end of file +SparseArrays.nnz(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) where {R} = nnz(p.gap) + +# Vertex iterator for IntervalAmbiguitySet +struct IntervalAmbiguitySetVertexIterator{R, VR <: AbstractVector{R}, P <: Permutations} + set::IntervalAmbiguitySet{R, VR} + perm::P +end + +function IntervalAmbiguitySetVertexIterator(set::IntervalAmbiguitySet) + perm = permutations(support(set)) + return IntervalAmbiguitySetVertexIterator(set, perm) +end +Base.IteratorEltype(::Type{<:IntervalAmbiguitySetVertexIterator}) = Base.HasEltype() +Base.eltype(::IntervalAmbiguitySetVertexIterator{R}) where {R} = Vector{R} +Base.IteratorSize(::Type{<:IntervalAmbiguitySetVertexIterator}) = Base.HasLength() +Base.length(it::IntervalAmbiguitySetVertexIterator) = length(it.perm) + +function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}) where {R, VR <: AbstractVector{R}} + res = iterate(it.perm) + + if isnothing(res) + throw(ArgumentError("The iterator is empty.")) + end + + (permutation, state) = res + + v = copy(lower(it.set)) + budget = 1.0 - sum(v) + for i in permutation + if budget <= gap(it.set, i) + v[i] += budget + break + else + v[i] += gap(it.set, i) + budget -= gap(it.set, i) + end + end + + return v, state +end + +function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}, state) where {R, VR <: AbstractVector{R}} + res = iterate(it.perm, state) + + if isnothing(res) + return nothing + end + + (permutation, state) = res + v = copy(lower(it.set)) + budget = 1.0 - sum(v) + for i in permutation + if budget <= gap(it.set, i) + v[i] += budget + break + else + v[i] += gap(it.set, i) + budget -= gap(it.set, i) + end + end + + return v, state +end + +vertex_generator(p::IntervalAmbiguitySet) = IntervalAmbiguitySetVertexIterator(p) +vertices(p::IntervalAmbiguitySet) = collect(vertex_generator(p)) \ No newline at end of file diff --git a/src/probabilities/Marginal.jl b/src/probabilities/Marginal.jl index e8811122..1c0087c8 100644 --- a/src/probabilities/Marginal.jl +++ b/src/probabilities/Marginal.jl @@ -86,8 +86,8 @@ end sub2ind(p::Marginal, action::CartesianIndex, source::CartesianIndex) = sub2ind(p, Tuple(action), Tuple(source)) function sub2ind(p::Marginal, action::NTuple{M, <:Integer}, source::NTuple{N, <:Integer}) where {N, M} - action = getindex.(Tuple(action), p.action_indices) - source = getindex.(Tuple(source), p.state_indices) + action = getindex.((action,), p.action_indices) + source = getindex.((source,), p.state_indices) j = p.linear_index[action..., source...] return j diff --git a/src/robust_value_iteration.jl b/src/robust_value_iteration.jl index f4346d64..3248e3d7 100644 --- a/src/robust_value_iteration.jl +++ b/src/robust_value_iteration.jl @@ -19,7 +19,7 @@ termination_criteria(prop, finitetime::Val{false}) = CovergenceCriteria(convergence_eps(prop)) """ - solve(problem::AbstractIntervalMDPAlgorithm, alg::RobustValueIteration; callback=nothing) + solve(problem::AbstractIntervalMDPProblem, alg::RobustValueIteration; callback=nothing) Solve minimizes/maximizes optimistic/pessimistic specification problems using value iteration for interval Markov processes. @@ -102,7 +102,7 @@ function _value_iteration!(problem::AbstractIntervalMDPProblem, alg; callback = maximize = ismaximize(spec) # It is more efficient to use allocate first and reuse across iterations - workspace = construct_workspace(mp) + workspace = construct_workspace(mp, bellman_algorithm(alg)) strategy_cache = construct_strategy_cache(problem) value_function = ValueFunction(problem) diff --git a/src/strategy_cache.jl b/src/strategy_cache.jl index 8997af06..391e0e6e 100644 --- a/src/strategy_cache.jl +++ b/src/strategy_cache.jl @@ -25,7 +25,7 @@ end construct_strategy_cache(::VerificationProblem{S, F, <:NoStrategy}) where {S, F} = NoStrategyCache() -function extract_strategy!(::NoStrategyCache, values, V, j, action_shape, maximize) +function extract_strategy!(::NoStrategyCache, values, V, j, maximize) return maximize ? maximum(values) : minimum(values) end step_postprocess_strategy_cache!(::NoStrategyCache) = nothing @@ -77,14 +77,13 @@ function extract_strategy!( values::AbstractArray{R}, V, jₛ, - action_shape, maximize, ) where {R <: Real} opt_val = maximize ? typemin(R) : typemax(R) - opt_index = ntuple(_ -> 1, length(action_shape)) + opt_index = ntuple(_ -> 1, ndims(values)) neutral = (opt_val, opt_index) - return _extract_strategy!(strategy_cache.cur_strategy, values, neutral, jₛ, action_shape, maximize) + return _extract_strategy!(strategy_cache.cur_strategy, values, neutral, jₛ, maximize) end function step_postprocess_strategy_cache!(strategy_cache::TimeVaryingStrategyCache) push!(strategy_cache.strategy, copy(strategy_cache.cur_strategy)) @@ -113,7 +112,6 @@ function extract_strategy!( values::AbstractArray{R}, V, jₛ, - action_shape, maximize, ) where {R <: Real} neutral = if all(iszero.(strategy_cache.strategy[jₛ])) @@ -122,17 +120,17 @@ function extract_strategy!( V[jₛ], strategy_cache.strategy[jₛ] end - return _extract_strategy!(strategy_cache.strategy, values, neutral, jₛ, action_shape, maximize) + return _extract_strategy!(strategy_cache.strategy, values, neutral, jₛ, maximize) end step_postprocess_strategy_cache!(::StationaryStrategyCache) = nothing # Shared between stationary and time-varying strategies -function _extract_strategy!(cur_strategy, values, neutral, jₛ, action_shape, maximize) +function _extract_strategy!(cur_strategy, values, neutral, jₛ, maximize) gt = maximize ? (>) : (<) opt_val, opt_index = neutral - for jₐ in CartesianIndices(action_shape) + for jₐ in CartesianIndices(values) v = values[jₐ] if gt(v, opt_val) opt_val = v diff --git a/src/workspace.jl b/src/workspace.jl index f61f7b17..817a981f 100644 --- a/src/workspace.jl +++ b/src/workspace.jl @@ -16,129 +16,177 @@ struct ProductWorkspace{W, MT <: AbstractArray} intermediate_values::MT end -function construct_workspace(proc::ProductProcess) +function construct_workspace(proc::ProductProcess, alg; kwargs...) mp = markov_process(proc) - underlying_workspace = construct_workspace(mp) + underlying_workspace = construct_workspace(mp, alg; kwargs...) intermediate_values = arrayfactory(mp, valuetype(mp), state_variables(mp)) return ProductWorkspace(underlying_workspace, intermediate_values) end +abstract type IMDPWorkspace end + # Dense -struct DenseIntervalWorkspace{T <: Real} +struct DenseIntervalOMaxWorkspace{T <: Real} <: IMDPWorkspace budget::Vector{T} scratch::Vector{Int32} permutation::Vector{Int32} actions::Vector{T} end -function DenseIntervalWorkspace(ambiguity_set::IntervalAmbiguitySets{R}, nactions) where {R <: Real} +function DenseIntervalOMaxWorkspace(ambiguity_set::IntervalAmbiguitySets{R}, nactions) where {R <: Real} budget = 1 .- vec(sum(ambiguity_set.lower; dims = 1)) scratch = Vector{Int32}(undef, num_target(ambiguity_set)) perm = Vector{Int32}(undef, num_target(ambiguity_set)) actions = Vector{R}(undef, nactions) - return DenseIntervalWorkspace(budget, scratch, perm, actions) + return DenseIntervalOMaxWorkspace(budget, scratch, perm, actions) end -permutation(ws::DenseIntervalWorkspace) = ws.permutation -scratch(ws::DenseIntervalWorkspace) = ws.scratch +permutation(ws::DenseIntervalOMaxWorkspace) = ws.permutation +scratch(ws::DenseIntervalOMaxWorkspace) = ws.scratch -struct ThreadedDenseIntervalWorkspace{T <: Real} - thread_workspaces::Vector{DenseIntervalWorkspace{T}} +struct ThreadedDenseIntervalOMaxWorkspace{T <: Real} <: IMDPWorkspace + thread_workspaces::Vector{DenseIntervalOMaxWorkspace{T}} end -function ThreadedDenseIntervalWorkspace(ambiguity_set::IntervalAmbiguitySets{R}, nactions) where {R <: Real} +function ThreadedDenseIntervalOMaxWorkspace(ambiguity_set::IntervalAmbiguitySets{R}, nactions) where {R <: Real} budget = 1 .- vec(sum(ambiguity_set.lower; dims = 1)) scratch = Vector{Int32}(undef, num_target(ambiguity_set)) perm = Vector{Int32}(undef, num_target(ambiguity_set)) workspaces = [ - DenseIntervalWorkspace(budget, scratch, perm, Vector{R}(undef, nactions)) for + DenseIntervalOMaxWorkspace(budget, scratch, perm, Vector{R}(undef, nactions)) for _ in 1:Threads.nthreads() ] - return ThreadedDenseIntervalWorkspace(workspaces) + return ThreadedDenseIntervalOMaxWorkspace(workspaces) end -Base.getindex(ws::ThreadedDenseIntervalWorkspace, i) = ws.thread_workspaces[i] +Base.getindex(ws::ThreadedDenseIntervalOMaxWorkspace, i) = ws.thread_workspaces[i] ## permutation and scratch space is shared across threads -permutation(ws::ThreadedDenseIntervalWorkspace) = permutation(first(ws.thread_workspaces)) -scratch(ws::ThreadedDenseIntervalWorkspace) = scratch(first(ws.thread_workspaces)) +permutation(ws::ThreadedDenseIntervalOMaxWorkspace) = permutation(first(ws.thread_workspaces)) +scratch(ws::ThreadedDenseIntervalOMaxWorkspace) = scratch(first(ws.thread_workspaces)) function construct_workspace( - prob::IntervalAmbiguitySets{R, MR}; - threshold = 10, + prob::IntervalAmbiguitySets{R, MR}, + ::OMaximization; + threshold = 10, kwargs... ) where {R, MR <: AbstractMatrix{R}} if Threads.nthreads() == 1 || num_sets(prob) <= threshold - return DenseIntervalWorkspace(prob, 1) + return DenseIntervalOMaxWorkspace(prob, 1) else - return ThreadedDenseIntervalWorkspace(prob, 1) + return ThreadedDenseIntervalOMaxWorkspace(prob, 1) end end function construct_workspace( - sys::FactoredRMDP{N, M, <:Tuple{<:Marginal{<:IntervalAmbiguitySets{R, MR}}}}; + sys::FactoredRMDP{N, M, <:Tuple{<:Marginal{<:IntervalAmbiguitySets{R, MR}}}}, + ::OMaximization; threshold = 10, + kwargs... ) where {N, M, R, MR <: AbstractMatrix{R}} prob = sys.transition[1].ambiguity_sets if Threads.nthreads() == 1 || num_states(sys) <= threshold - return DenseIntervalWorkspace(prob, num_actions(sys)) + return DenseIntervalOMaxWorkspace(prob, num_actions(sys)) else - return ThreadedDenseIntervalWorkspace(prob, num_actions(sys)) + return ThreadedDenseIntervalOMaxWorkspace(prob, num_actions(sys)) end end # Sparse -struct SparseIntervalWorkspace{T <: Real} +struct SparseIntervalOMaxWorkspace{T <: Real} <: IMDPWorkspace budget::Vector{T} scratch::Vector{Tuple{T, T}} values_gaps::Vector{Tuple{T, T}} actions::Vector{T} end -function SparseIntervalWorkspace(ambiguity_sets::IntervalAmbiguitySets{R}, nactions) where {R <: Real} +function SparseIntervalOMaxWorkspace(ambiguity_sets::IntervalAmbiguitySets{R}, nactions) where {R <: Real} max_support = maximum(nnz, ambiguity_sets) budget = 1 .- vec(sum(ambiguity_sets.lower; dims = 1)) scratch = Vector{Tuple{R, R}}(undef, max_support) values_gaps = Vector{Tuple{R, R}}(undef, max_support) actions = Vector{R}(undef, nactions) - return SparseIntervalWorkspace(budget, scratch, values_gaps, actions) + return SparseIntervalOMaxWorkspace(budget, scratch, values_gaps, actions) end -scratch(ws::SparseIntervalWorkspace) = ws.scratch +scratch(ws::SparseIntervalOMaxWorkspace) = ws.scratch -struct ThreadedSparseIntervalWorkspace{T} - thread_workspaces::Vector{SparseIntervalWorkspace{T}} +struct ThreadedSparseIntervalOMaxWorkspace{T <: Real} <: IMDPWorkspace + thread_workspaces::Vector{SparseIntervalOMaxWorkspace{T}} end -function ThreadedSparseIntervalWorkspace(ambiguity_sets::IntervalAmbiguitySets, nactions) +function ThreadedSparseIntervalOMaxWorkspace(ambiguity_sets::IntervalAmbiguitySets, nactions) nthreads = Threads.nthreads() - thread_workspaces = [SparseIntervalWorkspace(ambiguity_sets, nactions) for _ in 1:nthreads] - return ThreadedSparseIntervalWorkspace(thread_workspaces) + thread_workspaces = [SparseIntervalOMaxWorkspace(ambiguity_sets, nactions) for _ in 1:nthreads] + return ThreadedSparseIntervalOMaxWorkspace(thread_workspaces) end -Base.getindex(ws::ThreadedSparseIntervalWorkspace, i) = ws.thread_workspaces[i] +Base.getindex(ws::ThreadedSparseIntervalOMaxWorkspace, i) = ws.thread_workspaces[i] function construct_workspace( - prob::IntervalAmbiguitySets{R, MR}; + prob::IntervalAmbiguitySets{R, MR}, + ::OMaximization; threshold = 10, + kwargs... ) where {R, MR <: AbstractSparseMatrix{R}} if Threads.nthreads() == 1 || num_sets(prob) <= threshold - return SparseIntervalWorkspace(prob, 1) + return SparseIntervalOMaxWorkspace(prob, 1) else - return ThreadedSparseIntervalWorkspace(prob, 1) + return ThreadedSparseIntervalOMaxWorkspace(prob, 1) end end function construct_workspace( - sys::FactoredRMDP{N, M, <:Tuple{<:Marginal{<:IntervalAmbiguitySets{R, MR}}}}; + sys::FactoredRMDP{N, M, <:Tuple{<:Marginal{<:IntervalAmbiguitySets{R, MR}}}}, + ::OMaximization; threshold = 10, ) where {N, M, R, MR <: AbstractSparseMatrix{R}} prob = sys.transition[1].ambiguity_sets if Threads.nthreads() == 1 || num_states(sys) <= threshold - return SparseIntervalWorkspace(prob, num_actions(sys)) + return SparseIntervalOMaxWorkspace(prob, num_actions(sys)) else - return ThreadedSparseIntervalWorkspace(prob, num_actions(sys)) + return ThreadedSparseIntervalOMaxWorkspace(prob, num_actions(sys)) end end + +# Factored interval McCormick workspace +struct FactoredIntervalMcCormickWorkspace{M <: JuMP.Model, T <: Real, AT <: AbstractArray{T}} + model::M + actions::AT +end + +function FactoredIntervalMcCormickWorkspace(sys, alg) + model = JuMP.Model(alg.lp_optimizer) + JuMP.set_silent(model) + set_string_names_on_creation(model, false) + + actions = Array{valuetype(sys)}(undef, action_shape(sys)) + + return FactoredIntervalMcCormickWorkspace(model, actions) +end + +struct ThreadedFactoredIntervalMcCormickWorkspace{M <: JuMP.Model, T <: Real, AT <: AbstractArray{T}} <: IMDPWorkspace + thread_workspaces::Vector{FactoredIntervalMcCormickWorkspace{M, T, AT}} +end + +function ThreadedFactoredIntervalMcCormickWorkspace(sys, alg) + nthreads = Threads.nthreads() + thread_workspaces = [FactoredIntervalMcCormickWorkspace(sys, alg) for _ in 1:nthreads] + return ThreadedFactoredIntervalMcCormickWorkspace(thread_workspaces) +end +Base.getindex(ws::ThreadedFactoredIntervalMcCormickWorkspace, i) = ws.thread_workspaces[i] + +function construct_workspace( + sys::FactoredRMDP{N, M, <:NTuple{N, <:Marginal{<:IntervalAmbiguitySets}}}, + alg::LPMcCormickRelaxation; + threshold = 10, + kwargs... +) where {N, M} + if Threads.nthreads() == 1 || num_states(sys) <= threshold + return FactoredIntervalMcCormickWorkspace(sys, alg) + else + return ThreadedFactoredIntervalMcCormickWorkspace(sys, alg) + end +end \ No newline at end of file diff --git a/test/base/base.jl b/test/base/base.jl index b02ed5f5..63bd9d4e 100644 --- a/test/base/base.jl +++ b/test/base/base.jl @@ -5,7 +5,7 @@ test_files = [ "imdp.jl", "synthesis.jl", "specification.jl", - # "orthogonal.jl", + "factored.jl", # "mixture.jl", "labelling.jl", "dfa.jl", diff --git a/test/base/bellman.jl b/test/base/bellman.jl index de72e517..e66e3eeb 100644 --- a/test/base/bellman.jl +++ b/test/base/bellman.jl @@ -12,7 +12,7 @@ for N in [Float32, Float64, Rational{BigInt}] #### Maximization @testset "maximization" begin - ws = IntervalMDP.construct_workspace(prob) + ws = IntervalMDP.construct_workspace(prob, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = zeros(N, 2) IntervalMDP._bellman_helper!( @@ -25,7 +25,7 @@ for N in [Float32, Float64, Rational{BigInt}] ) @test Vres ≈ N[27 // 10, 17 // 10] # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] - ws = IntervalMDP.DenseIntervalWorkspace(prob, 1) + ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( @@ -38,7 +38,7 @@ for N in [Float32, Float64, Rational{BigInt}] ) @test Vres ≈ N[27 // 10, 17 // 10] - ws = IntervalMDP.ThreadedDenseIntervalWorkspace(prob, 1) + ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( @@ -54,7 +54,7 @@ for N in [Float32, Float64, Rational{BigInt}] #### Minimization @testset "minimization" begin - ws = IntervalMDP.construct_workspace(prob) + ws = IntervalMDP.construct_workspace(prob, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = zeros(N, 2) IntervalMDP._bellman_helper!( @@ -67,7 +67,7 @@ for N in [Float32, Float64, Rational{BigInt}] ) @test Vres ≈ N[17 // 10, 15 // 10] # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3] - ws = IntervalMDP.DenseIntervalWorkspace(prob, 1) + ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( @@ -80,7 +80,7 @@ for N in [Float32, Float64, Rational{BigInt}] ) @test Vres ≈ N[17 // 10, 15 // 10] - ws = IntervalMDP.ThreadedDenseIntervalWorkspace(prob, 1) + ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( diff --git a/test/base/factored.jl b/test/base/factored.jl new file mode 100644 index 00000000..5f862fef --- /dev/null +++ b/test/base/factored.jl @@ -0,0 +1,1698 @@ +using Revise, Test +using IntervalMDP +using Random: MersenneTwister + +for N in [Float32, Float64] + @testset "N = $N" begin + @testset "bellman 1d" begin + ambiguity_sets = IntervalAmbiguitySets(; + lower = N[ + 0 5//10 2//10 + 1//10 3//10 3//10 + 2//10 1//10 5//10 + ], + upper = N[ + 5//10 7//10 3//10 + 6//10 5//10 4//10 + 7//10 3//10 5//10 + ], + ) + imc = IntervalMarkovChain(ambiguity_sets) + + V = N[1, 2, 3] + + @testset "maximization" begin + Vexpected = N[27 // 10, 17 // 10, 23 // 10] + + ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = zeros(N, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = true, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = true, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = true, + ) + @test Vres ≈ Vexpected + end + + @testset "minimization" begin + Vexpected = N[17 // 10, 15 // 10, 23 // 10] + + ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = zeros(N, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = false, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = false, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = false, + ) + @test Vres ≈ Vexpected + end + end + + @testset "bellman 2d" begin + state_indices = (1, 2) + action_indices = (1,) + state_vars = (2, 3) + action_vars = (1,) + + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 7//30 1//15 13//30 4//15 1//6 + 2//5 7//30 1//30 11//30 2//15 1//10 + ], + upper = N[ + 17//30 7//10 2//3 4//5 7//10 2//3 + 9//10 13//15 9//10 5//6 4//5 14//15 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//30 1//3 1//6 1//15 2//5 2//15 + 4//15 1//4 1//6 1//30 2//15 1//30 + 2//15 7//30 1//10 7//30 7//15 1//5 + ], + upper = N[ + 2//3 7//15 4//5 11//30 19//30 1//2 + 23//30 4//5 23//30 3//5 7//10 8//15 + 7//15 4//5 23//30 7//10 7//15 23//30 + ] + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) + + V = N[ + 3 13 18 + 12 16 8 + ] + + #### Maximization + @testset "maximization" begin + V_vertex = N[ + 14.346666666666664 14.263333333333334 11.133333333333336 + 12.341111111111111 13.74333333333333 13.444444444444443 + ] + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + end + + #### Minimization + @testset "minimization" begin + V_vertex = N[ + 9.775555555555554 8.200000000000001 10.844444444444443 + 10.33 10.86 10.027777777777777 + ] + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 2, 3) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + end + end + + # @testset "bellman 2d partial dependence" begin + # state_indices = (1, 2) + # action_indices = (1,) + # state_vars = (2, 3) + # action_vars = (1,) + + # marginal1 = Marginal(IntervalAmbiguitySets(; + # lower = N[ + # 1//15 7//30 1//15 13//30 4//15 1//6 + # 2//5 7//30 1//30 11//30 2//15 1//10 + # ], + # upper = N[ + # 17//30 7//10 2//3 4//5 7//10 2//3 + # 9//10 13//15 9//10 5//6 4//5 14//15 + # ] + # ), state_indices, action_indices, state_vars, action_vars) + + # marginal2 = Marginal(IntervalAmbiguitySets(; + # lower = N[ + # 1//30 1//3 1//6 + # 4//15 1//4 1//6 + # 2//15 7//30 1//10 + # ], + # upper = N[ + # 2//3 7//15 4//5 + # 23//30 4//5 23//30 + # 7//15 4//5 23//30 + # ] + # ), (2,), action_indices, (3,), action_vars) + + # mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) + + # V = N[ + # 3 13 18 + # 12 16 8 + # ] + + # #### Maximization + # @testset "maximization" begin + # V_vertex = N[ + # 14.346666666666664 14.263333333333334 11.133333333333336 + # 12.341111111111111 13.74333333333333 13.444444444444443 + # ] + + # ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + # strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + # Vres_first = zeros(N, 2, 3) + # IntervalMDP.bellman!( + # ws, + # strategy_cache, + # Vres_first, + # V, + # mdp; + # upper_bound = true, + # ) + + # epsilon = N == Float32 ? 1e-5 : 1e-8 + # @test all(Vres_first .>= 0.0) + # @test all(Vres_first .<= maximum(V)) + # @test all(Vres_first .+ epsilon .>= V_vertex) + + # ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + # strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + # Vres = similar(Vres_first) + # IntervalMDP.bellman!( + # ws, + # strategy_cache, + # Vres, + # V, + # mdp; + # upper_bound = true, + # ) + # @test Vres ≈ Vres_first + + # ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + # strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + # Vres = similar(Vres_first) + # IntervalMDP.bellman!( + # ws, + # strategy_cache, + # Vres, + # V, + # mdp; + # upper_bound = true, + # ) + # @test Vres ≈ Vres_first + # end + + # #### Minimization + # @testset "minimization" begin + # V_vertex = N[ + # 9.775555555555554 8.200000000000001 10.844444444444443 + # 10.33 10.86 10.027777777777777 + # ] + + # ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + # strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + # Vres_first = zeros(N, 2, 3) + # IntervalMDP._bellman_helper!( + # ws, + # strategy_cache, + # Vres_first, + # V, + # mdp; + # upper_bound = false, + # ) + + # epsilon = N == Float32 ? 1e-5 : 1e-8 + # @test all(Vres_first .>= 0.0) + # @test all(Vres_first .<= maximum(V)) + # @test all(Vres_first .- epsilon .<= V_vertex) + + # ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + # strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + # Vres = similar(Vres_first) + # IntervalMDP.bellman!( + # ws, + # strategy_cache, + # Vres, + # V, + # mdp; + # upper_bound = false, + # ) + # @test Vres ≈ Vres_first + + # ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + # strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + # Vres = similar(Vres_first) + # IntervalMDP.bellman!( + # ws, + # strategy_cache, + # Vres, + # V, + # mdp; + # upper_bound = false, + # ) + # @test Vres ≈ Vres_first + # end + # end + + @testset "bellman 3d" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + action_vars = (1,) + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1//10 1//30 1//10 1//15 1//10 1//15 4//15 4//15 1//3 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 1//5 1//5 1//10 1//30 4//15 1//10 1//5 1//6 7//30 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 3//10 1//10 1//15 1//30 2//15 1//6 1//5 1//10 4//15 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 17//30 13//30 2//5 2//5 2//3 2//5 17//30 2//5 19//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 8//15 7//15 7//15 13//30 8//15 2//5 8//15 17//30 3//5 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 2//5 2//5 11//30 17//30 17//30 1//2 2//5 19//30 13//30 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1//6 7//30 1//15 2//15 1//10 1//3 7//30 1//30 7//30 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 3//10 0 3//10 1//6 3//10 1//5 0 7//30 2//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 1//15 1//3 2//15 1//15 1//5 1//5 1//15 7//30 1//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 13//30 17//30 17//30 13//30 11//30 19//30 8//15 2//5 8//15 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 1//2 8//15 8//15 8//15 8//15 2//5 3//5 2//3 13//30 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 1//3 2//3 17//30 8//15 17//30 3//5 2//5 19//30 11//30 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 7//30 4//15 1//10 1//3 1//5 7//30 1//30 1//5 7//30 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 1//5 4//15 1//10 1//10 1//3 7//30 3//10 1//3 3//10 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 0 1//30 1//15 2//15 1//6 7//30 4//15 4//15 7//30 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 2//3 17//30 3//5 7//15 19//30 1//2 3//5 1//3 19//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 2//3 17//30 8//15 13//30 13//30 3//5 1//2 8//15 8//15 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 0 2//5 11//30 19//30 19//30 2//5 1//2 7//15 7//15 + ] + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + V = N[ + 23, + 27, + 16, + 6, + 26, + 17, + 12, + 9, + 8, + 22, + 1, + 21, + 11, + 24, + 4, + 10, + 13, + 19, + 3, + 14, + 25, + 20, + 18, + 7, + 5, + 15, + 2, + ] + V = reshape(V, 3, 3, 3) + + #### Maximization + @testset "maximization" begin + V_vertex = N[ + 16.19533333333333, + 15.225999999999996, + 17.999333333333325, + 15.795888888888884, + 17.75407407407408, + 14.759111111111114, + 16.94551851851852, + 15.592148148148148, + 15.816333333333333, + 15.059555555555557, + 16.611333333333334, + 16.774814814814814, + 18.133333333333333, + 17.964999999999996, + 17.491666666666664, + 15.506666666666668, + 16.986962962962956, + 14.952518518518515, + 18.215555555555554, + 16.101592592592596, + 17.483888888888895, + 17.05688888888889, + 16.394444444444442, + 16.340666666666667, + 16.880444444444446, + 16.045185185185186, + 16.494074074074074, + ] + V_vertex = reshape(V_vertex, (3, 3, 3)) + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + end + + #### Minimization + @testset "minimization" begin + V_vertex = N[ + 10.856370370370371, + 11.341333333333333, + 12.446333333333333, + 11.282999999999996, + 12.351592592592592, + 12.094370370370369, + 9.957037037037036, + 9.98859259259259, + 12.769888888888888, + 12.433333333333332, + 12.526444444444442, + 11.579851851851851, + 13.808888888888886, + 11.394074074074076, + 13.171555555555557, + 12.11111111111111, + 12.080148148148147, + 11.569777777777778, + 13.288888888888888, + 11.581629629629631, + 11.545259259259257, + 10.001851851851853, + 11.602074074074073, + 12.530444444444445, + 12.070666666666666, + 10.503851851851852, + 12.760740740740742, + ] + V_vertex = reshape(V_vertex, (3, 3, 3)) + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 3, 3, 3) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + end + end + + @testset "implicit sink state" begin + @testset "first dimension" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + source_dims = (2, 3, 3) + action_vars = (1,) + + # Explicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 0 1//15 3//10 0 1//30 1//3 0 7//30 4//15 0 1//6 1//5 0 1//10 1//5 0 0 7//30 0 7//30 1//5 0 2//15 1//6 0 + 1//5 4//15 0 1//10 1//5 0 3//10 3//10 0 1//10 1//15 0 3//10 3//10 0 7//30 1//5 0 1//10 1//5 0 1//5 1//30 0 1//5 3//10 0 + 4//15 1//30 1 1//5 1//5 1 7//30 4//15 1 2//15 7//30 1 1//5 1//3 1 2//15 1//6 1 1//6 1//3 1 4//15 3//10 1 1//30 3//10 1 + ], + upper = N[ + 7//15 17//30 0 13//30 3//5 0 17//30 17//30 0 17//30 13//30 0 3//5 2//3 0 11//30 7//15 0 0 1//2 0 17//30 13//30 0 7//15 13//30 0 + 8//15 1//2 0 3//5 7//15 0 8//15 17//30 0 2//3 17//30 0 11//30 7//15 0 19//30 19//30 0 13//15 1//2 0 17//30 13//30 0 3//5 11//30 0 + 11//30 1//3 1 2//5 8//15 1 7//15 3//5 1 2//3 17//30 1 2//3 8//15 1 2//15 3//5 1 2//3 3//5 1 17//30 2//3 1 7//15 8//15 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 1 3//10 0 0 1//6 1//15 0 1//15 1//6 1 1//6 1//30 0 1//10 1//10 0 1//3 2//15 1 3//10 4//15 0 2//15 2//15 0 + 3//10 1//5 0 3//10 2//15 1 0 1//30 0 0 1//15 0 1//30 7//30 1 1//30 1//15 0 7//30 1//15 0 1//6 1//30 1 1//10 1//15 0 + 3//10 4//15 0 1//10 3//10 0 2//15 1//3 1 3//10 1//10 0 1//6 3//10 0 7//30 1//6 1 1//15 1//15 0 1//10 1//5 0 1//5 4//15 1 + ], + upper = N[ + 2//5 17//30 1 3//5 11//30 0 3//5 7//15 0 19//30 2//5 1 3//5 2//3 0 2//3 8//15 0 8//15 19//30 1 8//15 8//15 0 13//30 13//30 0 + 1//3 13//30 0 11//30 2//5 1 2//3 2//3 0 0 13//30 0 1//2 17//30 1 17//30 1//3 0 2//5 1//3 0 13//30 11//30 1 8//15 1//3 0 + 17//30 3//5 0 8//15 1//2 0 7//15 1//2 1 2//3 17//30 0 11//30 2//5 0 1//2 7//15 1 2//5 17//30 0 11//30 2//5 0 11//30 2//3 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 1 3//10 3//10 1 4//15 7//30 1 1//5 4//15 0 7//30 1//6 0 1//5 0 0 1//15 1//30 0 3//10 1//3 0 2//15 1//15 0 + 2//15 4//15 0 1//10 1//30 0 7//30 2//15 0 1//15 1//30 1 3//10 1//3 1 1//5 1//10 1 2//15 1//30 0 2//15 4//15 0 0 4//15 0 + 1//5 1//3 0 3//10 1//10 0 1//15 1//10 0 1//30 1//5 0 2//15 7//30 0 1//3 2//15 0 1//10 1//6 1 3//10 1//5 1 7//30 1//30 1 + ], + upper = N[ + 3//5 17//30 1 1//2 3//5 1 19//30 2//5 1 8//15 1//3 0 11//30 2//5 0 17//30 13//30 0 2//5 3//5 0 3//5 11//30 0 1//2 11//30 0 + 3//5 2//3 0 13//30 19//30 0 1//3 2//5 0 17//30 7//15 1 11//30 3//5 1 19//30 7//15 1 2//5 8//15 0 17//30 11//30 0 19//30 13//30 0 + 3//5 2//3 0 1//2 1//2 0 2//3 7//15 0 3//5 3//5 0 1//2 1//3 0 2//5 8//15 0 2//5 11//30 1 1//3 8//15 1 7//15 13//30 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + # Implicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ] + ), state_indices, action_indices, source_dims, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ] + ), state_indices, action_indices, source_dims, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ] + ), state_indices, action_indices, source_dims, action_vars) + + implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + + prop = FiniteTimeSafety([(3, i, j) for i in 1:3 for j in 1:3], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) + implicit_prob = VerificationProblem(implicit_mdp, spec) + + V, k, res = solve(prob) + V_implicit, k_implicit, res_implicit = solve(implicit_prob) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end + + @testset "second dimension" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + source_dims = (3, 2, 3) + action_vars = (1,) + + # Explicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 1 0 0 7//30 4//15 1//6 1//5 1//10 1//5 1 0 0 0 7//30 7//30 1//5 2//15 1//6 1 0 0 + 1//5 4//15 1//10 1//5 3//10 3//10 0 1 0 1//10 1//15 3//10 3//10 7//30 1//5 0 1 0 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 + 4//15 1//30 1//5 1//5 7//30 4//15 0 0 1 2//15 7//30 1//5 1//3 2//15 1//6 0 0 1 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 1 0 0 17//30 13//30 3//5 2//3 11//30 7//15 1 0 0 0 1//2 17//30 13//30 7//15 13//30 1 0 0 + 8//15 1//2 3//5 7//15 8//15 17//30 0 1 0 2//3 17//30 11//30 7//15 19//30 19//30 0 1 0 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 + 11//30 1//3 2//5 8//15 7//15 3//5 0 0 1 2//3 17//30 2//3 8//15 2//15 3//5 0 0 1 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 0 0 0 1//15 1//6 1//6 1//30 1//10 1//10 0 0 0 1//3 2//15 3//10 4//15 2//15 2//15 0 0 0 + 3//10 1//5 3//10 2//15 0 1//30 0 0 0 0 1//15 1//30 7//30 1//30 1//15 0 0 0 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 + 3//10 4//15 1//10 3//10 2//15 1//3 1 1 1 3//10 1//10 1//6 3//10 7//30 1//6 1 1 1 1//15 1//15 1//10 1//5 1//5 4//15 1 1 1 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 0 0 0 19//30 2//5 3//5 2//3 2//3 8//15 0 0 0 8//15 19//30 8//15 8//15 13//30 13//30 0 0 0 + 1//3 13//30 11//30 2//5 2//3 2//3 0 0 0 0 13//30 1//2 17//30 17//30 1//3 0 0 0 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 + 17//30 3//5 8//15 1//2 7//15 1//2 1 1 1 2//3 17//30 11//30 2//5 1//2 7//15 1 1 1 2//5 17//30 11//30 2//5 11//30 2//3 1 1 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1 1 1 1//5 4//15 7//30 1//6 1//5 0 0 0 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 + 2//15 4//15 1//10 1//30 7//30 2//15 0 0 0 1//15 1//30 3//10 1//3 1//5 1//10 1 1 1 2//15 1//30 2//15 4//15 0 4//15 0 0 0 + 1//5 1//3 3//10 1//10 1//15 1//10 0 0 0 1//30 1//5 2//15 7//30 1//3 2//15 0 0 0 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 1 1 1 8//15 1//3 11//30 2//5 17//30 13//30 0 0 0 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 + 3//5 2//3 13//30 19//30 1//3 2//5 0 0 0 17//30 7//15 11//30 3//5 19//30 7//15 1 1 1 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 + 3//5 2//3 1//2 1//2 2//3 7//15 0 0 0 3//5 3//5 1//2 1//3 2//5 8//15 0 0 0 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + # Implicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ] + ), state_indices, action_indices, source_dims, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ] + ), state_indices, action_indices, source_dims, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ] + ), state_indices, action_indices, source_dims, action_vars) + + implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + + prop = FiniteTimeSafety([(i, 3, j) for i in 1:3 for j in 1:3], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) + implicit_prob = VerificationProblem(implicit_mdp, spec) + + V, k, res = solve(prob) + V_implicit, k_implicit, res_implicit = solve(implicit_prob) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end + + @testset "last dimension" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + source_dims = (3, 3, 2) + action_vars = (1,) + + # Explicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1 0 0 1 0 0 1 0 0 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 0 1 0 0 1 0 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 0 0 1 0 0 1 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 1 0 0 1 0 0 1 0 0 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 0 1 0 0 1 0 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 0 0 1 0 0 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1 1 1 0 0 0 0 0 0 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 1 1 1 0 0 0 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 0 0 0 0 0 0 1 1 1 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 1 1 1 0 0 0 0 0 0 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 1 1 1 0 0 0 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 0 0 0 0 0 0 1 1 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 0 0 0 0 0 0 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 0 0 0 0 0 0 0 0 0 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 1 1 1 1 1 1 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 0 0 0 0 0 0 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 0 0 0 0 0 0 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 1 1 1 1 1 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + # Implicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ] + ), state_indices, action_indices, source_dims, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ] + ), state_indices, action_indices, source_dims, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ] + ), state_indices, action_indices, source_dims, action_vars) + + implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + + prop = FiniteTimeSafety([(i, j, 3) for i in 1:3 for j in 1:3], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) + implicit_prob = VerificationProblem(implicit_mdp, spec) + + V, k, res = solve(prob) + V_implicit, k_implicit, res_implicit = solve(implicit_prob) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end + end + end +end + +for N in [Float32, Float64] + @testset "N = $N" begin + # 4-D abstraction + @testset "4D abstraction" begin + rng = MersenneTwister(995) + + prob_lower = [rand(rng, N, 3, 81) ./ N(3) for _ in 1:4] + prob_upper = [(rand(rng, N, 3, 81) .+ N(1)) ./ N(3) for _ in 1:4] + + ambiguity_sets = ntuple( + i -> IntervalAmbiguitySets(; + lower = prob_lower[i], + upper = prob_upper[i], + ), + 4, + ) + + marginals = ntuple( + i -> Marginal(ambiguity_sets[i], (1, 2, 3, 4), (1,), (3, 3, 3, 3), (1,)), + 4, + ) + + mdp = FactoredRobustMarkovDecisionProcess((3, 3, 3, 3), (1,), marginals) + + prop = FiniteTimeReachability([(3, 3, 3, 3)], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) + + V_ortho, it_ortho, res_ortho = solve(prob) + + @test V_ortho[3, 3, 3, 3] ≈ one(N) + @test all(V_ortho .>= zero(N)) + @test all(V_ortho .<= one(N)) + + # Test against the naive construction + prob_lower_simple = zeros(N, 81, 81) + prob_upper_simple = zeros(N, 81, 81) + + lin = LinearIndices((3, 3, 3, 3)) + act_idx = CartesianIndex(1) + for I in CartesianIndices((3, 3, 3, 3)) + for J in CartesianIndices((3, 3, 3, 3)) + marginal_ambiguity_sets = map(marginal -> marginal[act_idx, I], marginals) + + prob_lower_simple[lin[J], lin[I]] = prod( + lower(marginal_ambiguity_sets[i], J[i]) for i in 1:4 + ) + + prob_upper_simple[lin[J], lin[I]] = prod( + upper(marginal_ambiguity_sets[i], J[i]) for i in 1:4 + ) + end + end + + ambiguity_set = IntervalAmbiguitySets(; + lower = prob_lower_simple, + upper = prob_upper_simple, + ) + + imc = IntervalMarkovChain(ambiguity_set) + + prop = FiniteTimeReachability([81], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(imc, spec) + + V_direct, it_direct, res_direct = solve(prob) + @test V_direct[81] ≈ one(N) + @test all(V_ortho .≥ reshape(V_direct, 3, 3, 3, 3)) + end + + @testset "synthesis" begin + rng = MersenneTwister(3286) + + num_states_per_axis = 3 + num_axis = 3 + num_states = num_states_per_axis^num_axis + num_actions = 2 + num_choices = num_states * num_actions + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = ntuple(_ -> num_states_per_axis, num_axis) + action_vars = (num_actions,) + + prob_lower = [ + rand(rng, N, num_states_per_axis, num_choices) ./ num_states_per_axis + for _ in 1:num_axis + ] + prob_upper = [ + (rand(rng, N, num_states_per_axis, num_choices) .+ N(1)) ./ + num_states_per_axis for _ in 1:num_axis + ] + + ambiguity_sets = ntuple( + i -> IntervalAmbiguitySets(; + lower = prob_lower[i], + upper = prob_upper[i], + ), + num_axis, + ) + + marginals = ntuple( + i -> Marginal(ambiguity_sets[i], state_indices, action_indices, state_vars, action_vars), + num_axis, + ) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, marginals) + + prop = FiniteTimeReachability( + [(num_states_per_axis, num_states_per_axis, num_states_per_axis)], + 10, + ) + spec = Specification(prop, Pessimistic, Maximize) + prob = ControlSynthesisProblem(mdp, spec) + + policy, V, it, res = solve(prob) + @test it == 10 + @test all(V .≥ 0.0) + + # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP + prob = VerificationProblem(mdp, spec, policy) + V_mc, k, res = solve(prob) + @test V ≈ V_mc + end + end +end + +# @testset "Orthogonal abstraction" begin +# using LazySets +# using SpecialFunctions: erf +# using LinearAlgebra: I + +# function IMDP_orthogonal_abstraction() +# A = 0.9 * I +# B = 0.7 * I +# sigma = 2.0 + +# X = Hyperrectangle(; low = [-10.0, -10.0], high = [10.0, 10.0]) +# X1 = Interval(-10.0, 10.0) +# X2 = Interval(-10.0, 10.0) +# U = Hyperrectangle(; low = [-1.0, -1.0], high = [1.0, 1.0]) + +# reach_region = Hyperrectangle(; low = [4.0, -6.0], high = [10.0, -2.0]) + +# l = [5, 5] +# X1_split = split(X1, l[1]) +# X2_split = split(X2, l[2]) + +# X_split = Matrix{LazySet}(undef, l[1], l[2]) +# for j in 1:l[2] +# for i in 1:l[1] +# x1 = X1_split[i] +# x2 = X2_split[j] +# X_split[i, j] = Hyperrectangle( +# [center(x1)[1], center(x2)[1]], +# [radius_hyperrectangle(x1)[1], radius_hyperrectangle(x2)[1]], +# ) +# end +# end + +# U_split = split(U, [3, 3]) + +# transition_prob(x, v_lower, v_upper) = +# 0.5 * +# erf((x - v_upper) / (sigma * sqrt(2.0)), (x - v_lower) / (sigma * sqrt(2.0))) + +# probs1 = IntervalProbabilities{Float64, Vector{Float64}, Matrix{Float64}}[] +# probs2 = IntervalProbabilities{Float64, Vector{Float64}, Matrix{Float64}}[] +# stateptr = Int32[1] + +# for source2 in 1:(l[2] + 1) +# for source1 in 1:(l[1] + 1) +# if source1 == 1 || source2 == 1 +# probs1_lower = zeros(l[1] + 1, 1) +# probs1_upper = zeros(l[1] + 1, 1) + +# probs1_upper[source1, 1] = 1 +# probs1_lower[source1, 1] = 1 + +# probs2_lower = zeros(l[2] + 1, 1) +# probs2_upper = zeros(l[2] + 1, 1) + +# probs2_upper[source2, 1] = 1 +# probs2_lower[source2, 1] = 1 + +# push!( +# probs1, +# IntervalProbabilities(; lower = probs1_lower, upper = probs1_upper), +# ) +# push!( +# probs2, +# IntervalProbabilities(; lower = probs2_lower, upper = probs2_upper), +# ) +# else +# Xij = X_split[source1 - 1, source2 - 1] + +# for u in U_split +# Xij_u = A * Xij + B * u +# Xij_u = box_approximation(Xij_u) + +# probs1_lower = zeros(l[1] + 1, 1) +# probs1_upper = zeros(l[1] + 1, 1) + +# for target1 in 1:(l[1] + 1) +# if target1 == 1 +# probs1_upper[target1, 1] = +# max( +# 1 - transition_prob( +# low(Xij_u)[1], +# low(X)[1], +# high(X)[1], +# ), +# 1 - transition_prob( +# high(Xij_u)[1], +# low(X)[1], +# high(X)[1], +# ), +# ) + eps(Float64) +# probs1_lower[target1, 1] = min( +# 1 - transition_prob( +# center(Xij_u)[1], +# low(X)[1], +# high(X)[1], +# ), +# 1 - transition_prob( +# low(Xij_u)[1], +# low(X)[1], +# high(X)[1], +# ), +# 1 - transition_prob( +# high(Xij_u)[1], +# low(X)[1], +# high(X)[1], +# ), +# ) +# else +# probs1_upper[target1, 1] = max( +# transition_prob( +# center(Xij_u)[1], +# low(X1_split[target1 - 1])[1], +# high(X1_split[target1 - 1])[1], +# ), +# transition_prob( +# low(Xij_u)[1], +# low(X1_split[target1 - 1])[1], +# high(X1_split[target1 - 1])[1], +# ), +# transition_prob( +# high(Xij_u)[1], +# low(X1_split[target1 - 1])[1], +# high(X1_split[target1 - 1])[1], +# ), +# ) +# probs1_lower[target1, 1] = min( +# transition_prob( +# low(Xij_u)[1], +# low(X1_split[target1 - 1])[1], +# high(X1_split[target1 - 1])[1], +# ), +# transition_prob( +# high(Xij_u)[1], +# low(X1_split[target1 - 1])[1], +# high(X1_split[target1 - 1])[1], +# ), +# ) +# end +# end + +# probs2_lower = zeros(l[2] + 1, 1) +# probs2_upper = zeros(l[2] + 1, 1) + +# for target2 in 1:(l[2] + 1) +# if target2 == 1 +# probs2_upper[target2, 1] = +# max( +# 1 - transition_prob( +# low(Xij_u)[2], +# low(X)[2], +# high(X)[2], +# ), +# 1 - transition_prob( +# high(Xij_u)[2], +# low(X)[2], +# high(X)[2], +# ), +# ) + eps(Float64) +# probs2_lower[target2, 1] = min( +# 1 - transition_prob( +# center(Xij_u)[2], +# low(X)[2], +# high(X)[2], +# ), +# 1 - transition_prob( +# low(Xij_u)[2], +# low(X)[2], +# high(X)[2], +# ), +# 1 - transition_prob( +# high(Xij_u)[2], +# low(X)[2], +# high(X)[2], +# ), +# ) +# else +# probs2_upper[target2, 1] = max( +# transition_prob( +# center(Xij_u)[2], +# low(X2_split[target2 - 1])[1], +# high(X2_split[target2 - 1])[1], +# ), +# transition_prob( +# low(Xij_u)[2], +# low(X2_split[target2 - 1])[1], +# high(X2_split[target2 - 1])[1], +# ), +# transition_prob( +# high(Xij_u)[2], +# low(X2_split[target2 - 1])[1], +# high(X2_split[target2 - 1])[1], +# ), +# ) +# probs2_lower[target2, 1] = min( +# transition_prob( +# low(Xij_u)[2], +# low(X2_split[target2 - 1])[1], +# high(X2_split[target2 - 1])[1], +# ), +# transition_prob( +# high(Xij_u)[2], +# low(X2_split[target2 - 1])[1], +# high(X2_split[target2 - 1])[1], +# ), +# ) +# end +# end + +# push!( +# probs1, +# IntervalProbabilities(; +# lower = probs1_lower, +# upper = probs1_upper, +# ), +# ) +# push!( +# probs2, +# IntervalProbabilities(; +# lower = probs2_lower, +# upper = probs2_upper, +# ), +# ) +# end +# end + +# push!(stateptr, length(probs1) + 1) +# end +# end + +# probs1, _ = IntervalMDP.interval_prob_hcat(probs1) +# probs2, _ = IntervalMDP.interval_prob_hcat(probs2) +# probs = OrthogonalIntervalProbabilities( +# (probs1, probs2), +# (Int32(l[1] + 1), Int32(l[2] + 1)), +# ) +# pmdp = OrthogonalIntervalMarkovDecisionProcess(probs, stateptr) + +# reach = Tuple{Int32, Int32}[] +# avoid = Tuple{Int32, Int32}[] + +# for j in 1:(l[2] + 1) +# for i in 1:(l[1] + 1) +# if j == 1 || i == 1 +# push!(avoid, (i, j)) +# elseif X_split[i - 1, j - 1] ⊆ reach_region +# push!(reach, (i, j)) +# end +# end +# end + +# return pmdp, reach, avoid +# end + +# function IMDP_direct_abstraction() +# A = 0.9I(2) +# B = 0.7I(2) +# sigma = 2.0 + +# X = Hyperrectangle(; low = [-10.0, -10.0], high = [10.0, 10.0]) +# X1 = Interval(-10.0, 10.0) +# X2 = Interval(-10.0, 10.0) +# U = Hyperrectangle(; low = [-1.0, -1.0], high = [1.0, 1.0]) + +# reach_region = Hyperrectangle(; low = [4.0, -6.0], high = [10.0, -2.0]) + +# l = [5, 5] +# X1_split = split(X1, l[1]) +# X2_split = split(X2, l[2]) + +# X_split = Matrix{LazySet}(undef, l[1] + 1, l[2] + 1) +# for j in 1:(l[2] + 1) +# for i in 1:(l[1] + 1) +# if i == 1 && j == 1 +# X_split[i, j] = CartesianProduct( +# Complement(Interval(low(X, 1), high(X, 1))), +# Complement(Interval(low(X, 2), high(X, 2))), +# ) +# elseif i == 1 +# x2 = X2_split[j - 1] +# X_split[i, j] = CartesianProduct( +# Complement(Interval(low(X, 1), high(X, 1))), +# Interval(low(x2, 1), high(x2, 1)), +# ) +# elseif j == 1 +# x1 = X1_split[i - 1] +# X_split[i, j] = CartesianProduct( +# Interval(low(x1, 1), high(x1, 1)), +# Complement(Interval(low(X, 2), high(X, 2))), +# ) +# else +# x1 = X1_split[i - 1] +# x2 = X2_split[j - 1] +# X_split[i, j] = Hyperrectangle( +# [center(x1)[1], center(x2)[1]], +# [radius_hyperrectangle(x1)[1], radius_hyperrectangle(x2)[1]], +# ) +# end +# end +# end + +# U_split = split(U, [3, 3]) + +# transition_prob(x, v_lower, v_upper) = +# 0.5 * +# erf((x - v_upper) / (sigma * sqrt(2.0)), (x - v_lower) / (sigma * sqrt(2.0))) + +# probs = IntervalProbabilities{Float64, Vector{Float64}, Matrix{Float64}}[] +# for source2 in 1:(l[2] + 1) +# for source1 in 1:(l[1] + 1) +# source = (source2 - 1) * (l[1] + 1) + source1 + +# probs_lower = Vector{Float64}[] +# probs_upper = Vector{Float64}[] + +# if source1 == 1 || source2 == 1 +# prob_upper = zeros(prod(l .+ 1)) +# prob_lower = zeros(prod(l .+ 1)) + +# prob_upper[source] = 1 +# prob_lower[source] = 1 + +# push!(probs_lower, prob_lower) +# push!(probs_upper, prob_upper) +# else +# Xij = X_split[source1, source2] + +# for u in U_split +# Xij_u = A * Xij + B * u +# box_Xij_u = box_approximation(Xij_u) + +# prob_upper = zeros(prod(l .+ 1)) +# prob_lower = zeros(prod(l .+ 1)) + +# for target2 in 1:(l[2] + 1) +# for target1 in 1:(l[1] + 1) +# Xij_target = X_split[target1, target2] +# target = (target2 - 1) * (l[1] + 1) + target1 + +# if target1 == 1 && target2 == 1 +# prob_upper[target] = +# max( +# 1 - transition_prob( +# low(box_Xij_u)[1], +# low(X)[1], +# high(X)[1], +# ), +# 1 - transition_prob( +# high(box_Xij_u)[1], +# low(X)[1], +# high(X)[1], +# ), +# ) * max( +# 1 - transition_prob( +# low(box_Xij_u)[2], +# low(X)[2], +# high(X)[2], +# ), +# 1 - transition_prob( +# high(box_Xij_u)[2], +# low(X)[2], +# high(X)[2], +# ), +# ) +# prob_lower[target] = +# min( +# 1 - transition_prob( +# center(box_Xij_u)[1], +# low(X)[1], +# high(X)[1], +# ), +# 1 - transition_prob( +# low(box_Xij_u)[1], +# low(X)[1], +# high(X)[1], +# ), +# 1 - transition_prob( +# high(box_Xij_u)[1], +# low(X)[1], +# high(X)[1], +# ), +# ) * min( +# 1 - transition_prob( +# center(box_Xij_u)[2], +# low(X)[2], +# high(X)[2], +# ), +# 1 - transition_prob( +# low(box_Xij_u)[2], +# low(X)[2], +# high(X)[2], +# ), +# 1 - transition_prob( +# high(box_Xij_u)[2], +# low(X)[2], +# high(X)[2], +# ), +# ) +# elseif target1 == 1 +# prob_upper[target] = +# max( +# 1 - transition_prob( +# low(box_Xij_u)[1], +# low(X)[1], +# high(X)[1], +# ), +# 1 - transition_prob( +# high(box_Xij_u)[1], +# low(X)[1], +# high(X)[1], +# ), +# ) * max( +# transition_prob( +# center(box_Xij_u)[2], +# low(Xij_target.Y)[1], +# high(Xij_target.Y)[1], +# ), +# transition_prob( +# low(box_Xij_u)[2], +# low(Xij_target.Y)[1], +# high(Xij_target.Y)[1], +# ), +# transition_prob( +# high(box_Xij_u)[2], +# low(Xij_target.Y)[1], +# high(Xij_target.Y)[1], +# ), +# ) +# prob_lower[target] = +# min( +# 1 - transition_prob( +# center(box_Xij_u)[1], +# low(X)[1], +# high(X)[1], +# ), +# 1 - transition_prob( +# low(box_Xij_u)[1], +# low(X)[1], +# high(X)[1], +# ), +# 1 - transition_prob( +# high(box_Xij_u)[1], +# low(X)[1], +# high(X)[1], +# ), +# ) * min( +# transition_prob( +# low(box_Xij_u)[2], +# low(Xij_target.Y)[1], +# high(Xij_target.Y)[1], +# ), +# transition_prob( +# high(box_Xij_u)[2], +# low(Xij_target.Y)[1], +# high(Xij_target.Y)[1], +# ), +# ) +# elseif target2 == 1 +# prob_upper[target] = +# max( +# transition_prob( +# center(box_Xij_u)[1], +# low(Xij_target.X)[1], +# high(Xij_target.X)[1], +# ), +# transition_prob( +# low(box_Xij_u)[1], +# low(Xij_target.X)[1], +# high(Xij_target.X)[1], +# ), +# transition_prob( +# high(box_Xij_u)[1], +# low(Xij_target.X)[1], +# high(Xij_target.X)[1], +# ), +# ) * max( +# 1 - transition_prob( +# low(box_Xij_u)[2], +# low(X)[2], +# high(X)[2], +# ), +# 1 - transition_prob( +# high(box_Xij_u)[2], +# low(X)[2], +# high(X)[2], +# ), +# ) +# prob_lower[target] = +# min( +# transition_prob( +# low(box_Xij_u)[1], +# low(Xij_target.X)[1], +# high(Xij_target.X)[1], +# ), +# transition_prob( +# high(box_Xij_u)[1], +# low(Xij_target.X)[1], +# high(Xij_target.X)[1], +# ), +# ) * min( +# 1 - transition_prob( +# center(box_Xij_u)[2], +# low(X)[1], +# high(X)[1], +# ), +# 1 - transition_prob( +# low(box_Xij_u)[2], +# low(X)[1], +# high(X)[1], +# ), +# 1 - transition_prob( +# high(box_Xij_u)[2], +# low(X)[1], +# high(X)[1], +# ), +# ) +# else +# prob_upper[target] = +# max( +# transition_prob( +# center(box_Xij_u)[1], +# low(Xij_target)[1], +# high(Xij_target)[1], +# ), +# transition_prob( +# low(box_Xij_u)[1], +# low(Xij_target)[1], +# high(Xij_target)[1], +# ), +# transition_prob( +# high(box_Xij_u)[1], +# low(Xij_target)[1], +# high(Xij_target)[1], +# ), +# ) * max( +# transition_prob( +# center(box_Xij_u)[2], +# low(Xij_target)[2], +# high(Xij_target)[2], +# ), +# transition_prob( +# low(box_Xij_u)[2], +# low(Xij_target)[2], +# high(Xij_target)[2], +# ), +# transition_prob( +# high(box_Xij_u)[2], +# low(Xij_target)[2], +# high(Xij_target)[2], +# ), +# ) +# prob_lower[target] = +# min( +# transition_prob( +# low(box_Xij_u)[1], +# low(Xij_target)[1], +# high(Xij_target)[1], +# ), +# transition_prob( +# high(box_Xij_u)[1], +# low(Xij_target)[1], +# high(Xij_target)[1], +# ), +# ) * min( +# transition_prob( +# low(box_Xij_u)[2], +# low(Xij_target)[2], +# high(Xij_target)[2], +# ), +# transition_prob( +# high(box_Xij_u)[2], +# low(Xij_target)[2], +# high(Xij_target)[2], +# ), +# ) +# end +# end +# end + +# push!(probs_lower, prob_lower) +# push!(probs_upper, prob_upper) +# end +# end + +# prob = IntervalProbabilities(; +# lower = reduce(hcat, probs_lower), +# upper = reduce(hcat, probs_upper), +# ) +# push!(probs, prob) +# end +# end +# mdp = IntervalMarkovDecisionProcess(probs) + +# reach = Int32[] +# avoid = Int32[] + +# for source2 in 1:(l[2] + 1) +# for source1 in 1:(l[1] + 1) +# Xij = X_split[source1, source2] +# source = (source2 - 1) * (l[1] + 1) + source1 + +# if source1 == 1 || source2 == 1 +# push!(avoid, source) +# elseif Xij ⊆ reach_region +# push!(reach, source) +# end +# end +# end + +# return mdp, reach, avoid +# end + +# # Orthogonal abstraction +# pmdp, reach_set, avoid_set = IMDP_orthogonal_abstraction() + +# prop = FiniteTimeReachAvoid(reach_set, avoid_set, 10) +# spec = Specification(prop, Pessimistic, Maximize) +# prob_ortho = VerificationProblem(pmdp, spec) + +# V_ortho, it_ortho, res_ortho = solve(prob_ortho) +# @test all(V_ortho .≥ 0.0) + +# # Direct abstraction +# mdp, reach_set, avoid_set = IMDP_direct_abstraction() + +# prop = FiniteTimeReachAvoid(reach_set, avoid_set, 10) +# spec = Specification(prop, Pessimistic, Maximize) +# prob_direct = VerificationProblem(mdp, spec) + +# V_direct, it_direct, res_direct = solve(prob_direct) + +# @test it_ortho == it_direct +# @test all(V_ortho .≥ reshape(V_direct, 6, 6)) +# end diff --git a/test/base/orthogonal.jl b/test/base/orthogonal.jl deleted file mode 100644 index e69c79ca..00000000 --- a/test/base/orthogonal.jl +++ /dev/null @@ -1,1369 +0,0 @@ -using Revise, Test -using IntervalMDP -using Random: MersenneTwister - -for N in [Float32, Float64, Rational{BigInt}] - @testset "N = $N" begin - @testset "bellman 1d" begin - prob = OrthogonalIntervalProbabilities( - ( - IntervalProbabilities(; - lower = N[0 5//10; 1//10 3//10; 2//10 1//10], - upper = N[5//10 7//10; 6//10 5//10; 7//10 3//10], - ), - ), - (Int32(2),), - ) - - V = N[1, 2, 3] - - @testset "maximization" begin - Vexpected = N[27 // 10, 17 // 10] - - ws = IntervalMDP.construct_workspace(prob) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = zeros(N, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = true, - ) - @test Vres ≈ Vexpected - - ws = IntervalMDP.DenseOrthogonalWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = true, - ) - @test Vres ≈ Vexpected - - ws = IntervalMDP.ThreadedDenseOrthogonalWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = true, - ) - @test Vres ≈ Vexpected - end - - @testset "minimization" begin - Vexpected = N[17 // 10, 15 // 10] - - ws = IntervalMDP.construct_workspace(prob) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = zeros(N, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = false, - ) - @test Vres ≈ Vexpected - - ws = IntervalMDP.DenseOrthogonalWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = false, - ) - @test Vres ≈ Vexpected - - ws = IntervalMDP.ThreadedDenseOrthogonalWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = false, - ) - @test Vres ≈ Vexpected - end - end - - @testset "bellman 3d" begin - lower1 = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1//10 1//30 1//10 1//15 1//10 1//15 4//15 4//15 1//3 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 1//5 1//5 1//10 1//30 4//15 1//10 1//5 1//6 7//30 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 3//10 1//10 1//15 1//30 2//15 1//6 1//5 1//10 4//15 - ] - lower2 = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1//6 7//30 1//15 2//15 1//10 1//3 7//30 1//30 7//30 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 3//10 0 3//10 1//6 3//10 1//5 0 7//30 2//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 1//15 1//3 2//15 1//15 1//5 1//5 1//15 7//30 1//15 - ] - lower3 = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 7//30 4//15 1//10 1//3 1//5 7//30 1//30 1//5 7//30 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 1//5 4//15 1//10 1//10 1//3 7//30 3//10 1//3 3//10 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 0 1//30 1//15 2//15 1//6 7//30 4//15 4//15 7//30 - ] - - upper1 = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 17//30 13//30 2//5 2//5 2//3 2//5 17//30 2//5 19//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 8//15 7//15 7//15 13//30 8//15 2//5 8//15 17//30 3//5 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 2//5 2//5 11//30 17//30 17//30 1//2 2//5 19//30 13//30 - ] - upper2 = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 13//30 17//30 17//30 13//30 11//30 19//30 8//15 2//5 8//15 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 1//2 8//15 8//15 8//15 8//15 2//5 3//5 2//3 13//30 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 1//3 2//3 17//30 8//15 17//30 3//5 2//5 19//30 11//30 - ] - upper3 = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 2//3 17//30 3//5 7//15 19//30 1//2 3//5 1//3 19//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 2//3 17//30 8//15 13//30 13//30 3//5 1//2 8//15 8//15 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 0 2//5 11//30 19//30 19//30 2//5 1//2 7//15 7//15 - ] - - prob = OrthogonalIntervalProbabilities( - ( - IntervalProbabilities(; lower = lower1, upper = upper1), - IntervalProbabilities(; lower = lower2, upper = upper2), - IntervalProbabilities(; lower = lower3, upper = upper3), - ), - (Int32(3), Int32(3), Int32(3)), - ) - - V = N[ - 23, - 27, - 16, - 6, - 26, - 17, - 12, - 9, - 8, - 22, - 1, - 21, - 11, - 24, - 4, - 10, - 13, - 19, - 3, - 14, - 25, - 20, - 18, - 7, - 5, - 15, - 2, - ] - V = reshape(V, 3, 3, 3) - - #### Maximization - @testset "maximization" begin - V111_expected = 17.276 - V222_expected = 18.838037037037 - V333_expected = 17.3777407407407 - V131_expected = 18.8653703703704 - V122_expected = 20.52 - V113_expected = 19.5096296296296 - - ws = IntervalMDP.construct_workspace(prob) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres_first = zeros(N, 3, 3, 3) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres_first, - V, - prob, - stateptr(prob); - upper_bound = true, - ) - @test all(Vres_first .>= 0.0) - @test Vres_first[1, 1, 1] ≈ V111_expected - @test Vres_first[2, 2, 2] ≈ V222_expected - @test Vres_first[3, 3, 3] ≈ V333_expected - @test Vres_first[1, 3, 1] ≈ V131_expected - @test Vres_first[1, 2, 2] ≈ V122_expected - @test Vres_first[1, 1, 3] ≈ V113_expected - - ws = IntervalMDP.DenseOrthogonalWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres_first) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = true, - ) - @test Vres ≈ Vres_first - - ws = IntervalMDP.ThreadedDenseOrthogonalWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres_first) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = true, - ) - @test Vres ≈ Vres_first - end - - #### Minimization - @testset "minimization" begin - V111_expected = 10.1567407407407 - V222_expected = 10.4691111111111 - V333_expected = 11.4640740740741 - V131_expected = 7.8724444444445 - V122_expected = 10.3088888888889 - V113_expected = 11.5774074074074 - - ws = IntervalMDP.construct_workspace(prob) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres_first = zeros(N, 3, 3, 3) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres_first, - V, - prob, - stateptr(prob); - upper_bound = false, - ) - @test all(Vres_first .>= 0.0) - @test Vres_first[1, 1, 1] ≈ V111_expected - @test Vres_first[2, 2, 2] ≈ V222_expected - @test Vres_first[3, 3, 3] ≈ V333_expected - @test Vres_first[1, 3, 1] ≈ V131_expected - @test Vres_first[1, 2, 2] ≈ V122_expected - @test Vres_first[1, 1, 3] ≈ V113_expected - - ws = IntervalMDP.DenseOrthogonalWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres_first) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = false, - ) - @test Vres ≈ Vres_first - - ws = IntervalMDP.ThreadedDenseOrthogonalWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres_first) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = false, - ) - @test Vres ≈ Vres_first - end - end - - @testset "implicit sink state" begin - @testset "first dimension" begin - lower1 = N[ - 1//15 3//10 0 1//15 3//10 0 1//30 1//3 0 7//30 4//15 0 1//6 1//5 0 1//10 1//5 0 0 7//30 0 7//30 1//5 0 2//15 1//6 0 - 1//5 4//15 0 1//10 1//5 0 3//10 3//10 0 1//10 1//15 0 3//10 3//10 0 7//30 1//5 0 1//10 1//5 0 1//5 1//30 0 1//5 3//10 0 - 4//15 1//30 1 1//5 1//5 1 7//30 4//15 1 2//15 7//30 1 1//5 1//3 1 2//15 1//6 1 1//6 1//3 1 4//15 3//10 1 1//30 3//10 1 - ] - lower2 = N[ - 1//10 1//15 1 3//10 0 0 1//6 1//15 0 1//15 1//6 1 1//6 1//30 0 1//10 1//10 0 1//3 2//15 1 3//10 4//15 0 2//15 2//15 0 - 3//10 1//5 0 3//10 2//15 1 0 1//30 0 0 1//15 0 1//30 7//30 1 1//30 1//15 0 7//30 1//15 0 1//6 1//30 1 1//10 1//15 0 - 3//10 4//15 0 1//10 3//10 0 2//15 1//3 1 3//10 1//10 0 1//6 3//10 0 7//30 1//6 1 1//15 1//15 0 1//10 1//5 0 1//5 4//15 1 - ] - lower3 = N[ - 4//15 1//5 1 3//10 3//10 1 4//15 7//30 1 1//5 4//15 0 7//30 1//6 0 1//5 0 0 1//15 1//30 0 3//10 1//3 0 2//15 1//15 0 - 2//15 4//15 0 1//10 1//30 0 7//30 2//15 0 1//15 1//30 1 3//10 1//3 1 1//5 1//10 1 2//15 1//30 0 2//15 4//15 0 0 4//15 0 - 1//5 1//3 0 3//10 1//10 0 1//15 1//10 0 1//30 1//5 0 2//15 7//30 0 1//3 2//15 0 1//10 1//6 1 3//10 1//5 1 7//30 1//30 1 - ] - - upper1 = N[ - 7//15 17//30 0 13//30 3//5 0 17//30 17//30 0 17//30 13//30 0 3//5 2//3 0 11//30 7//15 0 0 1//2 0 17//30 13//30 0 7//15 13//30 0 - 8//15 1//2 0 3//5 7//15 0 8//15 17//30 0 2//3 17//30 0 11//30 7//15 0 19//30 19//30 0 13//15 1//2 0 17//30 13//30 0 3//5 11//30 0 - 11//30 1//3 1 2//5 8//15 1 7//15 3//5 1 2//3 17//30 1 2//3 8//15 1 2//15 3//5 1 2//3 3//5 1 17//30 2//3 1 7//15 8//15 1 - ] - upper2 = N[ - 2//5 17//30 1 3//5 11//30 0 3//5 7//15 0 19//30 2//5 1 3//5 2//3 0 2//3 8//15 0 8//15 19//30 1 8//15 8//15 0 13//30 13//30 0 - 1//3 13//30 0 11//30 2//5 1 2//3 2//3 0 0 13//30 0 1//2 17//30 1 17//30 1//3 0 2//5 1//3 0 13//30 11//30 1 8//15 1//3 0 - 17//30 3//5 0 8//15 1//2 0 7//15 1//2 1 2//3 17//30 0 11//30 2//5 0 1//2 7//15 1 2//5 17//30 0 11//30 2//5 0 11//30 2//3 1 - ] - upper3 = N[ - 3//5 17//30 1 1//2 3//5 1 19//30 2//5 1 8//15 1//3 0 11//30 2//5 0 17//30 13//30 0 2//5 3//5 0 3//5 11//30 0 1//2 11//30 0 - 3//5 2//3 0 13//30 19//30 0 1//3 2//5 0 17//30 7//15 1 11//30 3//5 1 19//30 7//15 1 2//5 8//15 0 17//30 11//30 0 19//30 13//30 0 - 3//5 2//3 0 1//2 1//2 0 2//3 7//15 0 3//5 3//5 0 1//2 1//3 0 2//5 8//15 0 2//5 11//30 1 1//3 8//15 1 7//15 13//30 1 - ] - - prob = OrthogonalIntervalProbabilities( - ( - IntervalProbabilities(; lower = lower1, upper = upper1), - IntervalProbabilities(; lower = lower2, upper = upper2), - IntervalProbabilities(; lower = lower3, upper = upper3), - ), - (Int32(3), Int32(3), Int32(3)), - ) - - num_states = 27 - stateptr = [Int32[1]; convert.(Int32, 1 .+ collect(1:num_states))] - mdp = OrthogonalIntervalMarkovDecisionProcess(prob, stateptr) - - lower1 = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ] - lower2 = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ] - lower3 = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ] - - upper1 = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ] - upper2 = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ] - upper3 = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ] - - prob = OrthogonalIntervalProbabilities( - ( - IntervalProbabilities(; lower = lower1, upper = upper1), - IntervalProbabilities(; lower = lower2, upper = upper2), - IntervalProbabilities(; lower = lower3, upper = upper3), - ), - (Int32(2), Int32(3), Int32(3)), - ) - - num_states = 18 - stateptr = [Int32[1]; convert.(Int32, 1 .+ collect(1:num_states))] - implicit_mdp = OrthogonalIntervalMarkovDecisionProcess(prob, stateptr) - - prop = FiniteTimeSafety([(3, i, j) for i in 1:3 for j in 1:3], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(mdp, spec) - implicit_prob = VerificationProblem(implicit_mdp, spec) - - V, k, res = solve(prob) - V_implicit, k_implicit, res_implicit = solve(implicit_prob) - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end - - @testset "second dimension" begin - lower1 = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 1 0 0 7//30 4//15 1//6 1//5 1//10 1//5 1 0 0 0 7//30 7//30 1//5 2//15 1//6 1 0 0 - 1//5 4//15 1//10 1//5 3//10 3//10 0 1 0 1//10 1//15 3//10 3//10 7//30 1//5 0 1 0 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 - 4//15 1//30 1//5 1//5 7//30 4//15 0 0 1 2//15 7//30 1//5 1//3 2//15 1//6 0 0 1 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 - ] - lower2 = N[ - 1//10 1//15 3//10 0 1//6 1//15 0 0 0 1//15 1//6 1//6 1//30 1//10 1//10 0 0 0 1//3 2//15 3//10 4//15 2//15 2//15 0 0 0 - 3//10 1//5 3//10 2//15 0 1//30 0 0 0 0 1//15 1//30 7//30 1//30 1//15 0 0 0 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 - 3//10 4//15 1//10 3//10 2//15 1//3 1 1 1 3//10 1//10 1//6 3//10 7//30 1//6 1 1 1 1//15 1//15 1//10 1//5 1//5 4//15 1 1 1 - ] - lower3 = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1 1 1 1//5 4//15 7//30 1//6 1//5 0 0 0 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 - 2//15 4//15 1//10 1//30 7//30 2//15 0 0 0 1//15 1//30 3//10 1//3 1//5 1//10 1 1 1 2//15 1//30 2//15 4//15 0 4//15 0 0 0 - 1//5 1//3 3//10 1//10 1//15 1//10 0 0 0 1//30 1//5 2//15 7//30 1//3 2//15 0 0 0 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 - ] - - upper1 = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 1 0 0 17//30 13//30 3//5 2//3 11//30 7//15 1 0 0 0 1//2 17//30 13//30 7//15 13//30 1 0 0 - 8//15 1//2 3//5 7//15 8//15 17//30 0 1 0 2//3 17//30 11//30 7//15 19//30 19//30 0 1 0 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 - 11//30 1//3 2//5 8//15 7//15 3//5 0 0 1 2//3 17//30 2//3 8//15 2//15 3//5 0 0 1 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 - ] - upper2 = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 0 0 0 19//30 2//5 3//5 2//3 2//3 8//15 0 0 0 8//15 19//30 8//15 8//15 13//30 13//30 0 0 0 - 1//3 13//30 11//30 2//5 2//3 2//3 0 0 0 0 13//30 1//2 17//30 17//30 1//3 0 0 0 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 - 17//30 3//5 8//15 1//2 7//15 1//2 1 1 1 2//3 17//30 11//30 2//5 1//2 7//15 1 1 1 2//5 17//30 11//30 2//5 11//30 2//3 1 1 1 - ] - upper3 = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 1 1 1 8//15 1//3 11//30 2//5 17//30 13//30 0 0 0 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 - 3//5 2//3 13//30 19//30 1//3 2//5 0 0 0 17//30 7//15 11//30 3//5 19//30 7//15 1 1 1 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 - 3//5 2//3 1//2 1//2 2//3 7//15 0 0 0 3//5 3//5 1//2 1//3 2//5 8//15 0 0 0 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 - ] - - prob = OrthogonalIntervalProbabilities( - ( - IntervalProbabilities(; lower = lower1, upper = upper1), - IntervalProbabilities(; lower = lower2, upper = upper2), - IntervalProbabilities(; lower = lower3, upper = upper3), - ), - (Int32(3), Int32(3), Int32(3)), - ) - - num_states = 27 - stateptr = [Int32[1]; convert.(Int32, 1 .+ collect(1:num_states))] - mdp = OrthogonalIntervalMarkovDecisionProcess(prob, stateptr) - - lower1 = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ] - lower2 = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ] - lower3 = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ] - - upper1 = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ] - upper2 = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ] - upper3 = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ] - - prob = OrthogonalIntervalProbabilities( - ( - IntervalProbabilities(; lower = lower1, upper = upper1), - IntervalProbabilities(; lower = lower2, upper = upper2), - IntervalProbabilities(; lower = lower3, upper = upper3), - ), - (Int32(3), Int32(2), Int32(3)), - ) - - num_states = 18 - stateptr = [Int32[1]; convert.(Int32, 1 .+ collect(1:num_states))] - implicit_mdp = OrthogonalIntervalMarkovDecisionProcess(prob, stateptr) - - prop = FiniteTimeSafety([(i, 3, j) for i in 1:3 for j in 1:3], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(mdp, spec) - implicit_prob = VerificationProblem(implicit_mdp, spec) - - V, k, res = solve(prob) - V_implicit, k_implicit, res_implicit = solve(implicit_prob) - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end - - @testset "last dimension" begin - lower1 = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1 0 0 1 0 0 1 0 0 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 0 1 0 0 1 0 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 0 0 1 0 0 1 - ] - lower2 = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1 1 1 0 0 0 0 0 0 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 1 1 1 0 0 0 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 0 0 0 0 0 0 1 1 1 - ] - lower3 = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 0 0 0 0 0 0 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 0 0 0 0 0 0 0 0 0 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 1 1 1 1 1 1 - ] - - upper1 = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 1 0 0 1 0 0 1 0 0 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 0 1 0 0 1 0 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 0 0 1 0 0 1 - ] - upper2 = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 1 1 1 0 0 0 0 0 0 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 1 1 1 0 0 0 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 0 0 0 0 0 0 1 1 1 - ] - upper3 = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 0 0 0 0 0 0 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 0 0 0 0 0 0 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 1 1 1 1 1 1 - ] - - prob = OrthogonalIntervalProbabilities( - ( - IntervalProbabilities(; lower = lower1, upper = upper1), - IntervalProbabilities(; lower = lower2, upper = upper2), - IntervalProbabilities(; lower = lower3, upper = upper3), - ), - (Int32(3), Int32(3), Int32(3)), - ) - - num_states = 27 - stateptr = [Int32[1]; convert.(Int32, 1 .+ collect(1:num_states))] - mdp = OrthogonalIntervalMarkovDecisionProcess(prob, stateptr) - - lower1 = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ] - lower2 = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ] - lower3 = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ] - - upper1 = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ] - upper2 = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ] - upper3 = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ] - - prob = OrthogonalIntervalProbabilities( - ( - IntervalProbabilities(; lower = lower1, upper = upper1), - IntervalProbabilities(; lower = lower2, upper = upper2), - IntervalProbabilities(; lower = lower3, upper = upper3), - ), - (Int32(3), Int32(3), Int32(2)), - ) - - num_states = 18 - stateptr = [Int32[1]; convert.(Int32, 1 .+ collect(1:num_states))] - implicit_mdp = OrthogonalIntervalMarkovDecisionProcess(prob, stateptr) - - prop = FiniteTimeSafety([(i, j, 3) for i in 1:3 for j in 1:3], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(mdp, spec) - implicit_prob = VerificationProblem(implicit_mdp, spec) - - V, k, res = solve(prob) - V_implicit, k_implicit, res_implicit = solve(implicit_prob) - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end - end - end -end - -for N in [Float32, Float64] - @testset "N = $N" begin - # 3-D abstraction - @testset "3D abstraction" begin - rng = MersenneTwister(995) - - prob_lower = [rand(rng, N, 3, 27) ./ 3.0 for _ in 1:3] - prob_upper = [(rand(rng, N, 3, 27) .+ N(1)) ./ 3.0 for _ in 1:3] - - probs = OrthogonalIntervalProbabilities( - ntuple( - i -> IntervalProbabilities(; - lower = prob_lower[i], - upper = prob_upper[i], - ), - 3, - ), - (Int32(3), Int32(3), Int32(3)), - ) - - mdp = OrthogonalIntervalMarkovChain(probs) - - prop = FiniteTimeReachability([(3, 3, 3)], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(mdp, spec) - - V_ortho, it_ortho, res_ortho = solve(prob) - - @test V_ortho[3, 3, 3] ≈ 1.0 - @test all(V_ortho .>= 0.0) - - # Test against the naive construction - prob_lower_simple = zeros(N, 27, 27) - prob_upper_simple = zeros(N, 27, 27) - - for i in 1:27 - for (j₁, j₂, j₃) in Iterators.product(1:3, 1:3, 1:3) - j = (j₃ - 1) * 9 + (j₂ - 1) * 3 + j₁ - - prob_lower_simple[j, i] = - prob_lower[1][j₁, i] * prob_lower[2][j₂, i] * prob_lower[3][j₃, i] - prob_upper_simple[j, i] = - prob_upper[1][j₁, i] * prob_upper[2][j₂, i] * prob_upper[3][j₃, i] - end - end - - probs = IntervalProbabilities(; - lower = prob_lower_simple, - upper = prob_upper_simple, - ) - - mdp = IntervalMarkovChain(probs) - - prop = FiniteTimeReachability([27], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(mdp, spec) - - V_direct, it_direct, res_direct = solve(prob) - @test V_direct[27] ≈ N(1) - @test all(V_ortho .≥ reshape(V_direct, 3, 3, 3)) - end - - @testset "synthesis" begin - rng = MersenneTwister(3286) - - num_states_per_axis = 3 - num_axis = 3 - num_states = num_states_per_axis^num_axis - num_actions = 2 - num_choices = num_states * num_actions - - prob_lower = [ - rand(rng, N, num_states_per_axis, num_choices) ./ num_states_per_axis - for _ in 1:num_axis - ] - prob_upper = [ - (rand(rng, N, num_states_per_axis, num_choices) .+ N(1)) ./ - num_states_per_axis for _ in 1:num_axis - ] - - probs = OrthogonalIntervalProbabilities( - ntuple( - i -> IntervalProbabilities(; - lower = prob_lower[i], - upper = prob_upper[i], - ), - num_axis, - ), - ( - Int32(num_states_per_axis), - Int32(num_states_per_axis), - Int32(num_states_per_axis), - ), - ) - - stateptr = [Int32[1]; convert.(Int32, 1 .+ collect(1:num_states) .* 2)] - mdp = OrthogonalIntervalMarkovDecisionProcess(probs, stateptr) - - prop = FiniteTimeReachability( - [(num_states_per_axis, num_states_per_axis, num_states_per_axis)], - 10, - ) - spec = Specification(prop, Pessimistic, Maximize) - prob = ControlSynthesisProblem(mdp, spec) - - policy, V, it, res = solve(prob) - @test it == 10 - @test all(V .≥ 0.0) - - # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP - prob = VerificationProblem(mdp, spec, policy) - V_mc, k, res = solve(prob) - @test V ≈ V_mc - end - end -end - -@testset "Orthogonal abstraction" begin - using LazySets - using SpecialFunctions: erf - using LinearAlgebra: I - - function IMDP_orthogonal_abstraction() - A = 0.9 * I - B = 0.7 * I - sigma = 2.0 - - X = Hyperrectangle(; low = [-10.0, -10.0], high = [10.0, 10.0]) - X1 = Interval(-10.0, 10.0) - X2 = Interval(-10.0, 10.0) - U = Hyperrectangle(; low = [-1.0, -1.0], high = [1.0, 1.0]) - - reach_region = Hyperrectangle(; low = [4.0, -6.0], high = [10.0, -2.0]) - - l = [5, 5] - X1_split = split(X1, l[1]) - X2_split = split(X2, l[2]) - - X_split = Matrix{LazySet}(undef, l[1], l[2]) - for j in 1:l[2] - for i in 1:l[1] - x1 = X1_split[i] - x2 = X2_split[j] - X_split[i, j] = Hyperrectangle( - [center(x1)[1], center(x2)[1]], - [radius_hyperrectangle(x1)[1], radius_hyperrectangle(x2)[1]], - ) - end - end - - U_split = split(U, [3, 3]) - - transition_prob(x, v_lower, v_upper) = - 0.5 * - erf((x - v_upper) / (sigma * sqrt(2.0)), (x - v_lower) / (sigma * sqrt(2.0))) - - probs1 = IntervalProbabilities{Float64, Vector{Float64}, Matrix{Float64}}[] - probs2 = IntervalProbabilities{Float64, Vector{Float64}, Matrix{Float64}}[] - stateptr = Int32[1] - - for source2 in 1:(l[2] + 1) - for source1 in 1:(l[1] + 1) - if source1 == 1 || source2 == 1 - probs1_lower = zeros(l[1] + 1, 1) - probs1_upper = zeros(l[1] + 1, 1) - - probs1_upper[source1, 1] = 1 - probs1_lower[source1, 1] = 1 - - probs2_lower = zeros(l[2] + 1, 1) - probs2_upper = zeros(l[2] + 1, 1) - - probs2_upper[source2, 1] = 1 - probs2_lower[source2, 1] = 1 - - push!( - probs1, - IntervalProbabilities(; lower = probs1_lower, upper = probs1_upper), - ) - push!( - probs2, - IntervalProbabilities(; lower = probs2_lower, upper = probs2_upper), - ) - else - Xij = X_split[source1 - 1, source2 - 1] - - for u in U_split - Xij_u = A * Xij + B * u - Xij_u = box_approximation(Xij_u) - - probs1_lower = zeros(l[1] + 1, 1) - probs1_upper = zeros(l[1] + 1, 1) - - for target1 in 1:(l[1] + 1) - if target1 == 1 - probs1_upper[target1, 1] = - max( - 1 - transition_prob( - low(Xij_u)[1], - low(X)[1], - high(X)[1], - ), - 1 - transition_prob( - high(Xij_u)[1], - low(X)[1], - high(X)[1], - ), - ) + eps(Float64) - probs1_lower[target1, 1] = min( - 1 - transition_prob( - center(Xij_u)[1], - low(X)[1], - high(X)[1], - ), - 1 - transition_prob( - low(Xij_u)[1], - low(X)[1], - high(X)[1], - ), - 1 - transition_prob( - high(Xij_u)[1], - low(X)[1], - high(X)[1], - ), - ) - else - probs1_upper[target1, 1] = max( - transition_prob( - center(Xij_u)[1], - low(X1_split[target1 - 1])[1], - high(X1_split[target1 - 1])[1], - ), - transition_prob( - low(Xij_u)[1], - low(X1_split[target1 - 1])[1], - high(X1_split[target1 - 1])[1], - ), - transition_prob( - high(Xij_u)[1], - low(X1_split[target1 - 1])[1], - high(X1_split[target1 - 1])[1], - ), - ) - probs1_lower[target1, 1] = min( - transition_prob( - low(Xij_u)[1], - low(X1_split[target1 - 1])[1], - high(X1_split[target1 - 1])[1], - ), - transition_prob( - high(Xij_u)[1], - low(X1_split[target1 - 1])[1], - high(X1_split[target1 - 1])[1], - ), - ) - end - end - - probs2_lower = zeros(l[2] + 1, 1) - probs2_upper = zeros(l[2] + 1, 1) - - for target2 in 1:(l[2] + 1) - if target2 == 1 - probs2_upper[target2, 1] = - max( - 1 - transition_prob( - low(Xij_u)[2], - low(X)[2], - high(X)[2], - ), - 1 - transition_prob( - high(Xij_u)[2], - low(X)[2], - high(X)[2], - ), - ) + eps(Float64) - probs2_lower[target2, 1] = min( - 1 - transition_prob( - center(Xij_u)[2], - low(X)[2], - high(X)[2], - ), - 1 - transition_prob( - low(Xij_u)[2], - low(X)[2], - high(X)[2], - ), - 1 - transition_prob( - high(Xij_u)[2], - low(X)[2], - high(X)[2], - ), - ) - else - probs2_upper[target2, 1] = max( - transition_prob( - center(Xij_u)[2], - low(X2_split[target2 - 1])[1], - high(X2_split[target2 - 1])[1], - ), - transition_prob( - low(Xij_u)[2], - low(X2_split[target2 - 1])[1], - high(X2_split[target2 - 1])[1], - ), - transition_prob( - high(Xij_u)[2], - low(X2_split[target2 - 1])[1], - high(X2_split[target2 - 1])[1], - ), - ) - probs2_lower[target2, 1] = min( - transition_prob( - low(Xij_u)[2], - low(X2_split[target2 - 1])[1], - high(X2_split[target2 - 1])[1], - ), - transition_prob( - high(Xij_u)[2], - low(X2_split[target2 - 1])[1], - high(X2_split[target2 - 1])[1], - ), - ) - end - end - - push!( - probs1, - IntervalProbabilities(; - lower = probs1_lower, - upper = probs1_upper, - ), - ) - push!( - probs2, - IntervalProbabilities(; - lower = probs2_lower, - upper = probs2_upper, - ), - ) - end - end - - push!(stateptr, length(probs1) + 1) - end - end - - probs1, _ = IntervalMDP.interval_prob_hcat(probs1) - probs2, _ = IntervalMDP.interval_prob_hcat(probs2) - probs = OrthogonalIntervalProbabilities( - (probs1, probs2), - (Int32(l[1] + 1), Int32(l[2] + 1)), - ) - pmdp = OrthogonalIntervalMarkovDecisionProcess(probs, stateptr) - - reach = Tuple{Int32, Int32}[] - avoid = Tuple{Int32, Int32}[] - - for j in 1:(l[2] + 1) - for i in 1:(l[1] + 1) - if j == 1 || i == 1 - push!(avoid, (i, j)) - elseif X_split[i - 1, j - 1] ⊆ reach_region - push!(reach, (i, j)) - end - end - end - - return pmdp, reach, avoid - end - - function IMDP_direct_abstraction() - A = 0.9I(2) - B = 0.7I(2) - sigma = 2.0 - - X = Hyperrectangle(; low = [-10.0, -10.0], high = [10.0, 10.0]) - X1 = Interval(-10.0, 10.0) - X2 = Interval(-10.0, 10.0) - U = Hyperrectangle(; low = [-1.0, -1.0], high = [1.0, 1.0]) - - reach_region = Hyperrectangle(; low = [4.0, -6.0], high = [10.0, -2.0]) - - l = [5, 5] - X1_split = split(X1, l[1]) - X2_split = split(X2, l[2]) - - X_split = Matrix{LazySet}(undef, l[1] + 1, l[2] + 1) - for j in 1:(l[2] + 1) - for i in 1:(l[1] + 1) - if i == 1 && j == 1 - X_split[i, j] = CartesianProduct( - Complement(Interval(low(X, 1), high(X, 1))), - Complement(Interval(low(X, 2), high(X, 2))), - ) - elseif i == 1 - x2 = X2_split[j - 1] - X_split[i, j] = CartesianProduct( - Complement(Interval(low(X, 1), high(X, 1))), - Interval(low(x2, 1), high(x2, 1)), - ) - elseif j == 1 - x1 = X1_split[i - 1] - X_split[i, j] = CartesianProduct( - Interval(low(x1, 1), high(x1, 1)), - Complement(Interval(low(X, 2), high(X, 2))), - ) - else - x1 = X1_split[i - 1] - x2 = X2_split[j - 1] - X_split[i, j] = Hyperrectangle( - [center(x1)[1], center(x2)[1]], - [radius_hyperrectangle(x1)[1], radius_hyperrectangle(x2)[1]], - ) - end - end - end - - U_split = split(U, [3, 3]) - - transition_prob(x, v_lower, v_upper) = - 0.5 * - erf((x - v_upper) / (sigma * sqrt(2.0)), (x - v_lower) / (sigma * sqrt(2.0))) - - probs = IntervalProbabilities{Float64, Vector{Float64}, Matrix{Float64}}[] - for source2 in 1:(l[2] + 1) - for source1 in 1:(l[1] + 1) - source = (source2 - 1) * (l[1] + 1) + source1 - - probs_lower = Vector{Float64}[] - probs_upper = Vector{Float64}[] - - if source1 == 1 || source2 == 1 - prob_upper = zeros(prod(l .+ 1)) - prob_lower = zeros(prod(l .+ 1)) - - prob_upper[source] = 1 - prob_lower[source] = 1 - - push!(probs_lower, prob_lower) - push!(probs_upper, prob_upper) - else - Xij = X_split[source1, source2] - - for u in U_split - Xij_u = A * Xij + B * u - box_Xij_u = box_approximation(Xij_u) - - prob_upper = zeros(prod(l .+ 1)) - prob_lower = zeros(prod(l .+ 1)) - - for target2 in 1:(l[2] + 1) - for target1 in 1:(l[1] + 1) - Xij_target = X_split[target1, target2] - target = (target2 - 1) * (l[1] + 1) + target1 - - if target1 == 1 && target2 == 1 - prob_upper[target] = - max( - 1 - transition_prob( - low(box_Xij_u)[1], - low(X)[1], - high(X)[1], - ), - 1 - transition_prob( - high(box_Xij_u)[1], - low(X)[1], - high(X)[1], - ), - ) * max( - 1 - transition_prob( - low(box_Xij_u)[2], - low(X)[2], - high(X)[2], - ), - 1 - transition_prob( - high(box_Xij_u)[2], - low(X)[2], - high(X)[2], - ), - ) - prob_lower[target] = - min( - 1 - transition_prob( - center(box_Xij_u)[1], - low(X)[1], - high(X)[1], - ), - 1 - transition_prob( - low(box_Xij_u)[1], - low(X)[1], - high(X)[1], - ), - 1 - transition_prob( - high(box_Xij_u)[1], - low(X)[1], - high(X)[1], - ), - ) * min( - 1 - transition_prob( - center(box_Xij_u)[2], - low(X)[2], - high(X)[2], - ), - 1 - transition_prob( - low(box_Xij_u)[2], - low(X)[2], - high(X)[2], - ), - 1 - transition_prob( - high(box_Xij_u)[2], - low(X)[2], - high(X)[2], - ), - ) - elseif target1 == 1 - prob_upper[target] = - max( - 1 - transition_prob( - low(box_Xij_u)[1], - low(X)[1], - high(X)[1], - ), - 1 - transition_prob( - high(box_Xij_u)[1], - low(X)[1], - high(X)[1], - ), - ) * max( - transition_prob( - center(box_Xij_u)[2], - low(Xij_target.Y)[1], - high(Xij_target.Y)[1], - ), - transition_prob( - low(box_Xij_u)[2], - low(Xij_target.Y)[1], - high(Xij_target.Y)[1], - ), - transition_prob( - high(box_Xij_u)[2], - low(Xij_target.Y)[1], - high(Xij_target.Y)[1], - ), - ) - prob_lower[target] = - min( - 1 - transition_prob( - center(box_Xij_u)[1], - low(X)[1], - high(X)[1], - ), - 1 - transition_prob( - low(box_Xij_u)[1], - low(X)[1], - high(X)[1], - ), - 1 - transition_prob( - high(box_Xij_u)[1], - low(X)[1], - high(X)[1], - ), - ) * min( - transition_prob( - low(box_Xij_u)[2], - low(Xij_target.Y)[1], - high(Xij_target.Y)[1], - ), - transition_prob( - high(box_Xij_u)[2], - low(Xij_target.Y)[1], - high(Xij_target.Y)[1], - ), - ) - elseif target2 == 1 - prob_upper[target] = - max( - transition_prob( - center(box_Xij_u)[1], - low(Xij_target.X)[1], - high(Xij_target.X)[1], - ), - transition_prob( - low(box_Xij_u)[1], - low(Xij_target.X)[1], - high(Xij_target.X)[1], - ), - transition_prob( - high(box_Xij_u)[1], - low(Xij_target.X)[1], - high(Xij_target.X)[1], - ), - ) * max( - 1 - transition_prob( - low(box_Xij_u)[2], - low(X)[2], - high(X)[2], - ), - 1 - transition_prob( - high(box_Xij_u)[2], - low(X)[2], - high(X)[2], - ), - ) - prob_lower[target] = - min( - transition_prob( - low(box_Xij_u)[1], - low(Xij_target.X)[1], - high(Xij_target.X)[1], - ), - transition_prob( - high(box_Xij_u)[1], - low(Xij_target.X)[1], - high(Xij_target.X)[1], - ), - ) * min( - 1 - transition_prob( - center(box_Xij_u)[2], - low(X)[1], - high(X)[1], - ), - 1 - transition_prob( - low(box_Xij_u)[2], - low(X)[1], - high(X)[1], - ), - 1 - transition_prob( - high(box_Xij_u)[2], - low(X)[1], - high(X)[1], - ), - ) - else - prob_upper[target] = - max( - transition_prob( - center(box_Xij_u)[1], - low(Xij_target)[1], - high(Xij_target)[1], - ), - transition_prob( - low(box_Xij_u)[1], - low(Xij_target)[1], - high(Xij_target)[1], - ), - transition_prob( - high(box_Xij_u)[1], - low(Xij_target)[1], - high(Xij_target)[1], - ), - ) * max( - transition_prob( - center(box_Xij_u)[2], - low(Xij_target)[2], - high(Xij_target)[2], - ), - transition_prob( - low(box_Xij_u)[2], - low(Xij_target)[2], - high(Xij_target)[2], - ), - transition_prob( - high(box_Xij_u)[2], - low(Xij_target)[2], - high(Xij_target)[2], - ), - ) - prob_lower[target] = - min( - transition_prob( - low(box_Xij_u)[1], - low(Xij_target)[1], - high(Xij_target)[1], - ), - transition_prob( - high(box_Xij_u)[1], - low(Xij_target)[1], - high(Xij_target)[1], - ), - ) * min( - transition_prob( - low(box_Xij_u)[2], - low(Xij_target)[2], - high(Xij_target)[2], - ), - transition_prob( - high(box_Xij_u)[2], - low(Xij_target)[2], - high(Xij_target)[2], - ), - ) - end - end - end - - push!(probs_lower, prob_lower) - push!(probs_upper, prob_upper) - end - end - - prob = IntervalProbabilities(; - lower = reduce(hcat, probs_lower), - upper = reduce(hcat, probs_upper), - ) - push!(probs, prob) - end - end - mdp = IntervalMarkovDecisionProcess(probs) - - reach = Int32[] - avoid = Int32[] - - for source2 in 1:(l[2] + 1) - for source1 in 1:(l[1] + 1) - Xij = X_split[source1, source2] - source = (source2 - 1) * (l[1] + 1) + source1 - - if source1 == 1 || source2 == 1 - push!(avoid, source) - elseif Xij ⊆ reach_region - push!(reach, source) - end - end - end - - return mdp, reach, avoid - end - - # Orthogonal abstraction - pmdp, reach_set, avoid_set = IMDP_orthogonal_abstraction() - - prop = FiniteTimeReachAvoid(reach_set, avoid_set, 10) - spec = Specification(prop, Pessimistic, Maximize) - prob_ortho = VerificationProblem(pmdp, spec) - - V_ortho, it_ortho, res_ortho = solve(prob_ortho) - @test all(V_ortho .≥ 0.0) - - # Direct abstraction - mdp, reach_set, avoid_set = IMDP_direct_abstraction() - - prop = FiniteTimeReachAvoid(reach_set, avoid_set, 10) - spec = Specification(prop, Pessimistic, Maximize) - prob_direct = VerificationProblem(mdp, spec) - - V_direct, it_direct, res_direct = solve(prob_direct) - - @test it_ortho == it_direct - @test all(V_ortho .≥ reshape(V_direct, 6, 6)) -end diff --git a/test/sparse/bellman.jl b/test/sparse/bellman.jl index 5c3a2e19..528f9468 100644 --- a/test/sparse/bellman.jl +++ b/test/sparse/bellman.jl @@ -18,7 +18,7 @@ for N in [Float32, Float64, Rational{BigInt}] #### Maximization @testset "maximization" begin - ws = IntervalMDP.construct_workspace(prob) + ws = IntervalMDP.construct_workspace(prob, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = zeros(Float64, 2) IntervalMDP._bellman_helper!( @@ -31,7 +31,7 @@ for N in [Float32, Float64, Rational{BigInt}] ) @test Vres ≈ N[82 // 10, 57 // 10] # [0.3 * 4 + 0.7 * 10, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] - ws = IntervalMDP.SparseIntervalWorkspace(prob, 1) + ws = IntervalMDP.SparseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( @@ -44,7 +44,7 @@ for N in [Float32, Float64, Rational{BigInt}] ) @test Vres ≈ N[82 // 10, 57 // 10] - ws = IntervalMDP.ThreadedSparseIntervalWorkspace(prob, 1) + ws = IntervalMDP.ThreadedSparseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( @@ -60,7 +60,7 @@ for N in [Float32, Float64, Rational{BigInt}] #### Minimization @testset "minimization" begin - ws = IntervalMDP.construct_workspace(prob) + ws = IntervalMDP.construct_workspace(prob, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = zeros(Float64, 2) IntervalMDP._bellman_helper!( @@ -73,7 +73,7 @@ for N in [Float32, Float64, Rational{BigInt}] ) @test Vres ≈ N[37 // 10, 55 // 10] # [0.5 * 1 + 0.3 * 4 + 0.2 * 10, 0.6 * 5 + 0.3 * 6 + 0.1 * 7] - ws = IntervalMDP.SparseIntervalWorkspace(prob, 1) + ws = IntervalMDP.SparseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( @@ -86,7 +86,7 @@ for N in [Float32, Float64, Rational{BigInt}] ) @test Vres ≈ N[37 // 10, 55 // 10] - ws = IntervalMDP.ThreadedSparseIntervalWorkspace(prob, 1) + ws = IntervalMDP.ThreadedSparseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) IntervalMDP._bellman_helper!( From 060b3948f514328f6017d2c038e5cee1a7078d9d Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 11 Sep 2025 12:56:23 +0200 Subject: [PATCH 08/71] HolyTraits :chefskiss: --- src/Data/bmdp-tool.jl | 13 +- src/Data/intervalmdp.jl | 11 +- src/Data/prism.jl | 14 +- src/algorithms.jl | 7 +- src/cuda.jl | 2 +- .../FactoredRobustMarkovDecisionProcess.jl | 42 +- src/probabilities/Marginal.jl | 7 +- src/probabilities/probabilities.jl | 21 +- src/robust_value_iteration.jl | 2 +- src/utils.jl | 11 +- src/workspace.jl | 56 +- test/base/bellman.jl | 168 +- test/base/factored.jl | 1915 ++++++++--------- test/base/imdp.jl | 1086 +++++----- test/base/vi.jl | 238 +- test/data/bmdp_tool.jl | 14 +- test/data/intervalmdp.jl | 8 +- test/data/prism.jl | 24 +- 18 files changed, 1842 insertions(+), 1797 deletions(-) diff --git a/src/Data/bmdp-tool.jl b/src/Data/bmdp-tool.jl index 87d3b138..6f60e432 100644 --- a/src/Data/bmdp-tool.jl +++ b/src/Data/bmdp-tool.jl @@ -171,11 +171,18 @@ write_bmdp_tool_file( """ write_bmdp_tool_file(path, mdp::IMDP, terminal_states::Vector{<:CartesianIndex}) """ -function write_bmdp_tool_file( +write_bmdp_tool_file( + path, + mdp::IntervalMDP.FactoredRMDP, + terminal_states::Vector{<:CartesianIndex}, +) = _write_bmdp_tool_file(path, mdp, IntervalMDP.modeltype(mdp), terminal_states) + +function _write_bmdp_tool_file( path, - mdp::IntervalMDP.IMDP{M}, + mdp::IntervalMDP.FactoredRMDP, + ::IntervalMDP.IsIMDP, terminal_states::Vector{<:CartesianIndex}, -) where {M} +) marginal = marginals(mdp)[1] number_states = num_states(mdp) diff --git a/src/Data/intervalmdp.jl b/src/Data/intervalmdp.jl index aa65c8a1..3e2acbed 100644 --- a/src/Data/intervalmdp.jl +++ b/src/Data/intervalmdp.jl @@ -173,7 +173,10 @@ Write an `IntervalMarkovDecisionProcess` to an IntervalMDP.jl system file (netCD See [Data storage formats](@ref) for more information on the file format. """ -function write_intervalmdp_jl_model(model_path, mdp::IntervalMDP.IMDP{M}; deflate_level = 5) where {M} +write_intervalmdp_jl_model(model_path, mdp::IntervalMDP.FactoredRMDP; deflate_level = 5) = + _write_intervalmdp_jl_model(model_path, mdp, IntervalMDP.modeltype(mdp); deflate_level = deflate_level) + +function _write_intervalmdp_jl_model(model_path, mdp::IntervalMDP.FactoredRMDP, ::IntervalMDP.IsIMDP; deflate_level) Dataset(model_path, "c") do dataset dataset.attrib["model"] = "imdp" dataset.attrib["format"] = "sparse_csc" @@ -190,9 +193,9 @@ function write_intervalmdp_jl_model(model_path, mdp::IntervalMDP.IMDP{M}; deflat v[:] = istates marginal = marginals(mdp)[1] - ambiguity_sets = marginal.ambiguity_sets - l = ambiguity_sets.lower - g = ambiguity_sets.gap + as = ambiguity_sets(marginal) + l = as.lower + g = as.gap defDim(dataset, "lower_colptr", length(l.colptr)) v = defVar(dataset, "lower_colptr", Int32, ("lower_colptr",); deflatelevel = deflate_level) diff --git a/src/Data/prism.jl b/src/Data/prism.jl index fbdd8dce..f0fe50c5 100644 --- a/src/Data/prism.jl +++ b/src/Data/prism.jl @@ -45,15 +45,16 @@ function write_prism_file( lab_path, srew_path, pctl_path, - mdp::IntervalMDP.IMDP{M}, + mdp::IntervalMDP.FactoredRMDP, spec, -) where {M} +) write_prism_states_file(sta_path, mdp) write_prism_transitions_file(tra_path, mdp) write_prism_spec(lab_path, srew_path, pctl_path, mdp, spec) end -function write_prism_states_file(sta_path, mdp::IntervalMDP.IMDP{M}) where {M} +write_prism_states_file(sta_path, mdp::IntervalMDP.FactoredRMDP) = _write_prism_states_file(sta_path, mdp, IntervalMDP.modeltype(mdp)) +function _write_prism_states_file(sta_path, mdp::IntervalMDP.FactoredRMDP, ::IntervalMDP.NonFactored) number_states = num_states(mdp) open(sta_path, "w") do io @@ -66,10 +67,13 @@ function write_prism_states_file(sta_path, mdp::IntervalMDP.IMDP{M}) where {M} end end -function write_prism_transitions_file(tra_path, mdp::IntervalMDP.IMDP{M}; lb_threshold = 1e-12) where {M} +write_prism_transitions_file(tra_path, mdp::IntervalMDP.FactoredRMDP; lb_threshold = 1e-12) = + _write_prism_transitions_file(tra_path, mdp, IntervalMDP.modeltype(mdp); lb_threshold = lb_threshold) + +function _write_prism_transitions_file(tra_path, mdp::IntervalMDP.FactoredRMDP, ::IntervalMDP.IsIMDP; lb_threshold) marginal = marginals(mdp)[1] - num_transitions = nnz(marginal.ambiguity_sets.lower) # Number of non-zero entries in the lower bound matrix + num_transitions = nnz(ambiguity_sets(marginal).lower) # Number of non-zero entries in the lower bound matrix num_choices = source_shape(marginal)[1] * action_shape(marginal)[1] open(tra_path, "w") do io diff --git a/src/algorithms.jl b/src/algorithms.jl index cdb9445f..f1d3bfb0 100644 --- a/src/algorithms.jl +++ b/src/algorithms.jl @@ -4,10 +4,11 @@ Base.@kwdef struct LPMcCormickRelaxation{O} <: BellmanAlgorithm lp_optimizer::O = HiGHS.Optimizer end -default_bellman_algorithm(::IMDP) = OMaximization() -default_bellman_algorithm(::IntervalAmbiguitySets) = OMaximization() -default_bellman_algorithm(::FactoredRMDP{N, M, <:NTuple{N, <:Marginal{<:PolytopicAmbiguitySets}}}) where {N, M} = LPMcCormickRelaxation() default_bellman_algorithm(pp::ProductProcess) = default_bellman_algorithm(markov_process(pp)) +default_bellman_algorithm(mdp::FactoredRMDP) = default_bellman_algorithm(mdp, modeltype(mdp)) +default_bellman_algorithm(::FactoredRMDP, ::IsIMDP) = OMaximization() +default_bellman_algorithm(::FactoredRMDP, ::IsFIMDP) = LPMcCormickRelaxation() +default_bellman_algorithm(::IntervalAmbiguitySets) = OMaximization() abstract type ModelCheckingAlgorithm end diff --git a/src/cuda.jl b/src/cuda.jl index 4fc4c131..2ad42f4a 100644 --- a/src/cuda.jl +++ b/src/cuda.jl @@ -34,7 +34,7 @@ function checkdevice(v::AbstractArray, system::FactoredRMDP) end function checkdevice(v::AbstractArray, marginal::Marginal) - checkdevice(v, marginal.ambiguity_sets) + checkdevice(v, ambiguity_sets(marginal)) end function checkdevice(v::AbstractArray, p::IntervalAmbiguitySets) diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl index 1bddfd90..5693dee3 100644 --- a/src/models/FactoredRobustMarkovDecisionProcess.jl +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -2,7 +2,7 @@ struct FactoredRobustMarkovDecisionProcess{ N, M, - P <: NTuple{N, <:AbstractMarginal}, + P <: NTuple{N, Marginal}, VI <: InitialStates, } <: IntervalMarkovProcess state_vars::NTuple{N, Int32} # N is the number of state variables and state_vars[n] is the number of states for state variable n @@ -19,21 +19,19 @@ struct FactoredRobustMarkovDecisionProcess{ source_dims::NTuple{N, Int32}, transition::P, initial_states::VI = nothing, - ) where {N, M, P <: NTuple{N, <:AbstractMarginal}, VI <: InitialStates} + ) where {N, M, P <: NTuple{N, Marginal}, VI <: InitialStates} check_rmdp(state_vars, action_vars, source_dims, transition, initial_states) return new{N, M, P, VI}(state_vars, action_vars, source_dims, transition, initial_states) end end const FactoredRMDP = FactoredRobustMarkovDecisionProcess -const FactoredIMDP{N, M} = FactoredRMDP{N, M, P} where {P <: NTuple{N, <:Marginal{<:IntervalAmbiguitySets}}} -const IMDP{M} = FactoredIMDP{1, M} function FactoredRMDP( state_vars::NTuple{N, <:Integer}, action_vars::NTuple{M, <:Integer}, source_dims::NTuple{N, <:Integer}, - transition::NTuple{N, <:AbstractMarginal}, + transition::NTuple{N, Marginal}, initial_states::VI = AllStates(), ) where {N, M, VI <: InitialStates} state_vars_32 = Int32.(state_vars) @@ -46,7 +44,7 @@ end function FactoredRMDP( state_vars::NTuple{N, <:Integer}, action_vars::NTuple{M, <:Integer}, - transition::NTuple{N, <:AbstractMarginal}, + transition::NTuple{N, Marginal}, initial_states::VI = AllStates(), ) where {N, M, VI <: InitialStates} return FactoredRobustMarkovDecisionProcess(state_vars, action_vars, state_vars, transition, initial_states) @@ -122,4 +120,34 @@ action_shape(m::FactoredRobustMarkovDecisionProcess) = m.action_vars function Base.getindex(rmdp::FactoredRMDP, r) return rmdp.transition[r] -end \ No newline at end of file +end + +### Model type analysis +abstract type ModelType end + +abstract type NonFactored <: ModelType end +struct IsIMDP <: NonFactored end # Interval MDP +struct IsRMDP <: NonFactored end # Robust MDP + +abstract type Factored <: ModelType end +struct IsFIMDP <: ModelType end # Factored Interval MDP +struct IsFPMDP <: ModelType end # Factored Polytopic MDP +struct IsFRMDP <: ModelType end # Factored Robust MDP + +# Single marginal - special case +modeltype(rmdp::FactoredRMDP{1}) = modeltype(rmdp, isinterval(rmdp.transition[1])) +modeltype(::FactoredRMDP{1}, ::IsInterval) = IsIMDP() +modeltype(::FactoredRMDP{1}, ::IsNotInterval) = IsRMDP() + +# General factored case + +# Check if all marginals are interval ambiguity sets +modeltype(rmdp::FactoredRMDP{N}) where {N} = modeltype(rmdp, isinterval.(rmdp.transition)) +modeltype(::FactoredRMDP{N}, ::NTuple{N, IsInterval}) where {N} = IsFIMDP() + +# If not, check if all marginals are polytopic ambiguity sets +modeltype(::FactoredRMDP{N}, ::NTuple{N, AbstractIsInterval}) where {N} = modeltype(rmdp, ispolytopic.(rmdp.transition)) +modeltype(::FactoredRMDP{N}, ::NTuple{N, IsPolytopic}) where {N} = IsFPMDP() + +# Otherwise, it is a general factored robust MDP +modeltype(::FactoredRMDP{N}, ::NTuple{N, AbstractIsPolytopic}) where {N} = IsFRMDP() diff --git a/src/probabilities/Marginal.jl b/src/probabilities/Marginal.jl index 1c0087c8..247eca35 100644 --- a/src/probabilities/Marginal.jl +++ b/src/probabilities/Marginal.jl @@ -1,4 +1,4 @@ -struct SARectangularMarginal{A <: AbstractAmbiguitySets, N, M, I <: LinearIndices} <: AbstractMarginal +struct Marginal{A <: AbstractAmbiguitySets, N, M, I <: LinearIndices} ambiguity_sets::A state_indices::NTuple{N, Int32} @@ -8,7 +8,7 @@ struct SARectangularMarginal{A <: AbstractAmbiguitySets, N, M, I <: LinearIndice action_vars::NTuple{M, Int32} linear_index::I - function SARectangularMarginal( + function Marginal( ambiguity_sets::A, state_indices::NTuple{N, Int32}, action_indices::NTuple{M, Int32}, @@ -21,7 +21,6 @@ struct SARectangularMarginal{A <: AbstractAmbiguitySets, N, M, I <: LinearIndice return new{A, N, M, typeof(linear_index)}(ambiguity_sets, state_indices, action_indices, source_dims, action_vars, linear_index) end end -const Marginal = SARectangularMarginal function Marginal( ambiguity_sets::A, @@ -36,7 +35,7 @@ function Marginal( source_dims_32 = Int32.(source_dims) action_vars_32 = Int32.(action_vars) - return SARectangularMarginal(ambiguity_sets, state_indices_32, action_indices_32, source_dims_32, action_vars_32) + return Marginal(ambiguity_sets, state_indices_32, action_indices_32, source_dims_32, action_vars_32) end function Marginal(ambiguity_sets::A, source_dims, action_vars) where {A <: AbstractAmbiguitySets} diff --git a/src/probabilities/probabilities.jl b/src/probabilities/probabilities.jl index 3ccf41d0..7360a9f9 100644 --- a/src/probabilities/probabilities.jl +++ b/src/probabilities/probabilities.jl @@ -1,6 +1,14 @@ +# Ambiguity sets abstract type AbstractAmbiguitySets end abstract type PolytopicAmbiguitySets <: AbstractAmbiguitySets end +abstract type AbstractIsPolytopic end +struct IsPolytopic <: AbstractIsPolytopic end +struct IsNotPolytopic <: AbstractIsPolytopic end + +ispolytopic(::AbstractAmbiguitySets) = IsNotPolytopic() +ispolytopic(::PolytopicAmbiguitySets) = IsPolytopic() + abstract type AbstractAmbiguitySet end abstract type PolytopicAmbiguitySet <: AbstractAmbiguitySet end @@ -23,10 +31,21 @@ export support include("IntervalAmbiguitySets.jl") export IntervalAmbiguitySets, lower, upper, gap -abstract type AbstractMarginal end +abstract type AbstractIsInterval end +struct IsInterval <: AbstractIsInterval end +struct IsNotInterval <: AbstractIsInterval end + +isinterval(::AbstractAmbiguitySets) = IsNotInterval() +isinterval(::IntervalAmbiguitySets) = IsInterval() + +# Marginals include("Marginal.jl") export SARectangularMarginal, Marginal, ambiguity_sets, state_variables, action_variables, source_shape, action_shape, num_target +ispolytopic(marginal::Marginal) = ispolytopic(ambiguity_sets(marginal)) +isinterval(marginal::Marginal) = isinterval(ambiguity_sets(marginal)) + +# DFA include("TransitionFunction.jl") export TransitionFunction, transition diff --git a/src/robust_value_iteration.jl b/src/robust_value_iteration.jl index 3248e3d7..dca57e08 100644 --- a/src/robust_value_iteration.jl +++ b/src/robust_value_iteration.jl @@ -11,7 +11,7 @@ end termination_criteria(prop, finitetime::Val{true}) = FixedIterationsCriteria(time_horizon(prop)) -struct CovergenceCriteria{T <: AbstractFloat} <: TerminationCriteria +struct CovergenceCriteria{T <: Real} <: TerminationCriteria tol::T end (f::CovergenceCriteria)(V, k, u) = maximum(abs, u) < f.tol diff --git a/src/utils.jl b/src/utils.jl index d2bc3d3a..b071c7ca 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,15 +1,14 @@ arrayfactory(mp::ProductProcess, T, num_states) = arrayfactory(markov_process(mp), T, num_states) arrayfactory(mp::FactoredRMDP, T, num_states) = - arrayfactory(mp.transition[1], T, num_states) + arrayfactory(marginals(mp)[1], T, num_states) arrayfactory(marginal::Marginal, T, num_states) = - arrayfactory(marginal.ambiguity_sets, T, num_states) + arrayfactory(ambiguity_sets(marginal), T, num_states) arrayfactory(prob::IntervalAmbiguitySets, T, num_states) = arrayfactory(prob.gap, T, num_states) arrayfactory(::MR, T, num_states) where {MR <: AbstractArray} = Array{T}(undef, num_states) valuetype(mp::ProductProcess) = valuetype(markov_process(mp)) -valuetype(mp::FactoredRMDP) = valuetype(mp.transition[1]) -valuetype(marginal::Marginal) = valuetype(marginal.ambiguity_sets) -valuetype(prob::IntervalAmbiguitySets) = valuetype(prob.gap) -valuetype(::MR) where {R, MR <: AbstractArray{R}} = R +valuetype(mp::FactoredRMDP) = promote_type(valuetype.(marginals(mp))...) +valuetype(marginal::Marginal) = valuetype(ambiguity_sets(marginal)) +valuetype(::IntervalAmbiguitySets{R}) where {R} = R diff --git a/src/workspace.jl b/src/workspace.jl index 817a981f..fb55e2f7 100644 --- a/src/workspace.jl +++ b/src/workspace.jl @@ -24,8 +24,21 @@ function construct_workspace(proc::ProductProcess, alg; kwargs...) return ProductWorkspace(underlying_workspace, intermediate_values) end +construct_workspace(mdp::FactoredRMDP, bellman_alg; kwargs...) = construct_workspace(mdp, modeltype(mdp), bellman_alg; kwargs...) + abstract type IMDPWorkspace end +function construct_workspace( + sys::FactoredRMDP, + ::IsIMDP, + ::OMaximization; + threshold = 10, + kwargs... +) + prob = ambiguity_sets(marginals(sys)[1]) + return construct_workspace(prob, OMaximization(); threshold = threshold, num_actions = num_actions(sys), kwargs...) +end + # Dense struct DenseIntervalOMaxWorkspace{T <: Real} <: IMDPWorkspace budget::Vector{T} @@ -70,26 +83,12 @@ scratch(ws::ThreadedDenseIntervalOMaxWorkspace) = scratch(first(ws.thread_worksp function construct_workspace( prob::IntervalAmbiguitySets{R, MR}, ::OMaximization; - threshold = 10, kwargs... + threshold = 10, num_actions = 1, kwargs... ) where {R, MR <: AbstractMatrix{R}} if Threads.nthreads() == 1 || num_sets(prob) <= threshold - return DenseIntervalOMaxWorkspace(prob, 1) - else - return ThreadedDenseIntervalOMaxWorkspace(prob, 1) - end -end - -function construct_workspace( - sys::FactoredRMDP{N, M, <:Tuple{<:Marginal{<:IntervalAmbiguitySets{R, MR}}}}, - ::OMaximization; - threshold = 10, - kwargs... -) where {N, M, R, MR <: AbstractMatrix{R}} - prob = sys.transition[1].ambiguity_sets - if Threads.nthreads() == 1 || num_states(sys) <= threshold - return DenseIntervalOMaxWorkspace(prob, num_actions(sys)) + return DenseIntervalOMaxWorkspace(prob, num_actions) else - return ThreadedDenseIntervalOMaxWorkspace(prob, num_actions(sys)) + return ThreadedDenseIntervalOMaxWorkspace(prob, num_actions) end end @@ -129,25 +128,13 @@ function construct_workspace( prob::IntervalAmbiguitySets{R, MR}, ::OMaximization; threshold = 10, + num_actions = 1, kwargs... ) where {R, MR <: AbstractSparseMatrix{R}} if Threads.nthreads() == 1 || num_sets(prob) <= threshold - return SparseIntervalOMaxWorkspace(prob, 1) - else - return ThreadedSparseIntervalOMaxWorkspace(prob, 1) - end -end - -function construct_workspace( - sys::FactoredRMDP{N, M, <:Tuple{<:Marginal{<:IntervalAmbiguitySets{R, MR}}}}, - ::OMaximization; - threshold = 10, -) where {N, M, R, MR <: AbstractSparseMatrix{R}} - prob = sys.transition[1].ambiguity_sets - if Threads.nthreads() == 1 || num_states(sys) <= threshold - return SparseIntervalOMaxWorkspace(prob, num_actions(sys)) + return SparseIntervalOMaxWorkspace(prob, num_actions) else - return ThreadedSparseIntervalOMaxWorkspace(prob, num_actions(sys)) + return ThreadedSparseIntervalOMaxWorkspace(prob, num_actions) end end @@ -179,11 +166,12 @@ end Base.getindex(ws::ThreadedFactoredIntervalMcCormickWorkspace, i) = ws.thread_workspaces[i] function construct_workspace( - sys::FactoredRMDP{N, M, <:NTuple{N, <:Marginal{<:IntervalAmbiguitySets}}}, + sys::FactoredRMDP, + ::Union{IsFIMDP, IsIMDP}, alg::LPMcCormickRelaxation; threshold = 10, kwargs... -) where {N, M} +) if Threads.nthreads() == 1 || num_states(sys) <= threshold return FactoredIntervalMcCormickWorkspace(sys, alg) else diff --git a/test/base/bellman.jl b/test/base/bellman.jl index e66e3eeb..a5f85a90 100644 --- a/test/base/bellman.jl +++ b/test/base/bellman.jl @@ -1,97 +1,95 @@ using Revise, Test using IntervalMDP -for N in [Float32, Float64, Rational{BigInt}] - @testset "N = $N" begin - prob = IntervalAmbiguitySets(; - lower = N[0 1//2; 1//10 3//10; 2//10 1//10], - upper = N[5//10 7//10; 6//10 5//10; 7//10 3//10], - ) +@testset for N in [Float32, Float64, Rational{BigInt}] + prob = IntervalAmbiguitySets(; + lower = N[0 1//2; 1//10 3//10; 2//10 1//10], + upper = N[5//10 7//10; 6//10 5//10; 7//10 3//10], + ) - V = N[1, 2, 3] + V = N[1, 2, 3] - #### Maximization - @testset "maximization" begin - ws = IntervalMDP.construct_workspace(prob, OMaximization()) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = zeros(N, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = true, - ) - @test Vres ≈ N[27 // 10, 17 // 10] # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] + #### Maximization + @testset "maximization" begin + ws = IntervalMDP.construct_workspace(prob, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + Vres = zeros(N, 2) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres, + V, + prob; + upper_bound = true, + ) + @test Vres ≈ N[27 // 10, 17 // 10] # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] - ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = true, - ) - @test Vres ≈ N[27 // 10, 17 // 10] + ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + Vres = similar(Vres) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres, + V, + prob; + upper_bound = true, + ) + @test Vres ≈ N[27 // 10, 17 // 10] - ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = true, - ) - @test Vres ≈ N[27 // 10, 17 // 10] - end + ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + Vres = similar(Vres) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres, + V, + prob; + upper_bound = true, + ) + @test Vres ≈ N[27 // 10, 17 // 10] + end - #### Minimization - @testset "minimization" begin - ws = IntervalMDP.construct_workspace(prob, OMaximization()) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = zeros(N, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = false, - ) - @test Vres ≈ N[17 // 10, 15 // 10] # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3] + #### Minimization + @testset "minimization" begin + ws = IntervalMDP.construct_workspace(prob, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + Vres = zeros(N, 2) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres, + V, + prob; + upper_bound = false, + ) + @test Vres ≈ N[17 // 10, 15 // 10] # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3] - ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = false, - ) - @test Vres ≈ N[17 // 10, 15 // 10] + ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + Vres = similar(Vres) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres, + V, + prob; + upper_bound = false, + ) + @test Vres ≈ N[17 // 10, 15 // 10] - ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = false, - ) - @test Vres ≈ N[17 // 10, 15 // 10] - end + ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + Vres = similar(Vres) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres, + V, + prob; + upper_bound = false, + ) + @test Vres ≈ N[17 // 10, 15 // 10] end end diff --git a/test/base/factored.jl b/test/base/factored.jl index 5f862fef..55be37b3 100644 --- a/test/base/factored.jl +++ b/test/base/factored.jl @@ -2,1063 +2,1044 @@ using Revise, Test using IntervalMDP using Random: MersenneTwister -for N in [Float32, Float64] - @testset "N = $N" begin - @testset "bellman 1d" begin - ambiguity_sets = IntervalAmbiguitySets(; - lower = N[ - 0 5//10 2//10 - 1//10 3//10 3//10 - 2//10 1//10 5//10 - ], - upper = N[ - 5//10 7//10 3//10 - 6//10 5//10 4//10 - 7//10 3//10 5//10 - ], +@testset for N in [Float32, Float64] + @testset "bellman 1d" begin + ambiguity_sets = IntervalAmbiguitySets(; + lower = N[ + 0 5//10 2//10 + 1//10 3//10 3//10 + 2//10 1//10 5//10 + ], + upper = N[ + 5//10 7//10 3//10 + 6//10 5//10 4//10 + 7//10 3//10 5//10 + ], + ) + imc = IntervalMarkovChain(ambiguity_sets) + + V = N[1, 2, 3] + + @testset "maximization" begin + Vexpected = IntervalMDP.bellman(V, imc; upper_bound = true) # Using O-maximization, should be equivalent + + ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = zeros(N, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = true, ) - imc = IntervalMarkovChain(ambiguity_sets) - - V = N[1, 2, 3] - - @testset "maximization" begin - Vexpected = N[27 // 10, 17 // 10, 23 // 10] - - ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(imc) - Vres = zeros(N, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = true, - ) - @test Vres ≈ Vexpected - - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(imc) - Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = true, - ) - @test Vres ≈ Vexpected - - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(imc) - Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = true, - ) - @test Vres ≈ Vexpected - end + @test Vres ≈ Vexpected + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = true, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = true, + ) + @test Vres ≈ Vexpected + end - @testset "minimization" begin - Vexpected = N[17 // 10, 15 // 10, 23 // 10] - - ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(imc) - Vres = zeros(N, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = false, - ) - @test Vres ≈ Vexpected - - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(imc) - Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = false, - ) - @test Vres ≈ Vexpected - - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(imc) - Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = false, - ) - @test Vres ≈ Vexpected - end + @testset "minimization" begin + Vexpected = IntervalMDP.bellman(V, imc; upper_bound = false) # Using O-maximization, should be equivalent + + ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = zeros(N, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = false, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = false, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = false, + ) + @test Vres ≈ Vexpected end + end + + @testset "bellman 2d" begin + state_indices = (1, 2) + action_indices = (1,) + state_vars = (2, 3) + action_vars = (1,) + jₐ = CartesianIndex(1) + + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 7//30 1//15 13//30 4//15 1//6 + 2//5 7//30 1//30 11//30 2//15 1//10 + ], + upper = N[ + 17//30 7//10 2//3 4//5 7//10 2//3 + 9//10 13//15 9//10 5//6 4//5 14//15 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//30 1//3 1//6 1//15 2//5 2//15 + 4//15 1//4 1//6 1//30 2//15 1//30 + 2//15 7//30 1//10 7//30 7//15 1//5 + ], + upper = N[ + 2//3 7//15 4//5 11//30 19//30 1//2 + 23//30 4//5 23//30 3//5 7//10 8//15 + 7//15 4//5 23//30 7//10 7//15 23//30 + ] + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) + + V = N[ + 3 13 18 + 12 16 8 + ] + eval_vertices(p, q) = sum(V[I] * p[I[1]] * q[I[2]] for I in CartesianIndices(state_vars)) + + #### Maximization + @testset "maximization" begin + V_vertex = [ + maximum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₛ in CartesianIndices(state_vars) + ] # The maximum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + end + + #### Minimization + @testset "minimization" begin + V_vertex = [ + minimum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₛ in CartesianIndices(state_vars) + ] # The minimum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + end + end + + @testset "bellman 2d partial dependence" begin + state_vars = (2, 3) + action_vars = (1,) + jₐ = CartesianIndex(1) + + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 7//30 1//15 13//30 4//15 1//6 + 2//5 7//30 1//30 11//30 2//15 1//10 + ], + upper = N[ + 17//30 7//10 2//3 4//5 7//10 2//3 + 9//10 13//15 9//10 5//6 4//5 14//15 + ] + ), (1, 2), (1,), (2, 3), (1,)) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//30 1//3 1//6 + 4//15 1//4 1//6 + 2//15 7//30 1//10 + ], + upper = N[ + 2//3 7//15 4//5 + 23//30 4//5 23//30 + 7//15 4//5 23//30 + ] + ), (2,), (1,), (3,), (1,)) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) + + V = N[ + 3 13 18 + 12 16 8 + ] + eval_vertices(p, q) = sum(V[I] * p[I[1]] * q[I[2]] for I in CartesianIndices(state_vars)) + + #### Maximization + @testset "maximization" begin + V_vertex = [ + maximum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₛ in CartesianIndices(state_vars) + ] # The maximum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + end + + #### Minimization + @testset "minimization" begin + V_vertex = [ + minimum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₛ in CartesianIndices(state_vars) + ] # The minimum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 2, 3) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + end + end - @testset "bellman 2d" begin - state_indices = (1, 2) + @testset "bellman 3d" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + action_vars = (1,) + jₐ = CartesianIndex(1) + + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1//10 1//30 1//10 1//15 1//10 1//15 4//15 4//15 1//3 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 1//5 1//5 1//10 1//30 4//15 1//10 1//5 1//6 7//30 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 3//10 1//10 1//15 1//30 2//15 1//6 1//5 1//10 4//15 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 17//30 13//30 2//5 2//5 2//3 2//5 17//30 2//5 19//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 8//15 7//15 7//15 13//30 8//15 2//5 8//15 17//30 3//5 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 2//5 2//5 11//30 17//30 17//30 1//2 2//5 19//30 13//30 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1//6 7//30 1//15 2//15 1//10 1//3 7//30 1//30 7//30 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 3//10 0 3//10 1//6 3//10 1//5 0 7//30 2//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 1//15 1//3 2//15 1//15 1//5 1//5 1//15 7//30 1//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 13//30 17//30 17//30 13//30 11//30 19//30 8//15 2//5 8//15 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 1//2 8//15 8//15 8//15 8//15 2//5 3//5 2//3 13//30 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 1//3 2//3 17//30 8//15 17//30 3//5 2//5 19//30 11//30 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 7//30 4//15 1//10 1//3 1//5 7//30 1//30 1//5 7//30 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 1//5 4//15 1//10 1//10 1//3 7//30 3//10 1//3 3//10 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 0 1//30 1//15 2//15 1//6 7//30 4//15 4//15 7//30 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 2//3 17//30 3//5 7//15 19//30 1//2 3//5 1//3 19//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 2//3 17//30 8//15 13//30 13//30 3//5 1//2 8//15 8//15 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 0 2//5 11//30 19//30 19//30 2//5 1//2 7//15 7//15 + ] + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + V = N[ + 23, + 27, + 16, + 6, + 26, + 17, + 12, + 9, + 8, + 22, + 1, + 21, + 11, + 24, + 4, + 10, + 13, + 19, + 3, + 14, + 25, + 20, + 18, + 7, + 5, + 15, + 2, + ] + V = reshape(V, 3, 3, 3) + eval_vertices(p, q, r) = sum(V[I] * p[I[1]] * q[I[2]] * r[I[3]] for I in CartesianIndices(state_vars)) + + #### Maximization + @testset "maximization" begin + V_vertex = [ + maximum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal3[jₐ, jₛ]) + ) + ) for jₛ in CartesianIndices(state_vars) + ] # The maximum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + end + + #### Minimization + @testset "minimization" begin + V_vertex = [ + minimum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal3[jₐ, jₛ]) + ) + ) for jₛ in CartesianIndices(state_vars) + ] # The minimum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 3, 3, 3) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + end + end + + @testset "implicit sink state" begin + @testset "first dimension" begin + state_indices = (1, 2, 3) action_indices = (1,) - state_vars = (2, 3) + state_vars = (3, 3, 3) + source_dims = (2, 3, 3) action_vars = (1,) + # Explicit marginal1 = Marginal(IntervalAmbiguitySets(; lower = N[ - 1//15 7//30 1//15 13//30 4//15 1//6 - 2//5 7//30 1//30 11//30 2//15 1//10 + 1//15 3//10 0 1//15 3//10 0 1//30 1//3 0 7//30 4//15 0 1//6 1//5 0 1//10 1//5 0 0 7//30 0 7//30 1//5 0 2//15 1//6 0 + 1//5 4//15 0 1//10 1//5 0 3//10 3//10 0 1//10 1//15 0 3//10 3//10 0 7//30 1//5 0 1//10 1//5 0 1//5 1//30 0 1//5 3//10 0 + 4//15 1//30 1 1//5 1//5 1 7//30 4//15 1 2//15 7//30 1 1//5 1//3 1 2//15 1//6 1 1//6 1//3 1 4//15 3//10 1 1//30 3//10 1 ], upper = N[ - 17//30 7//10 2//3 4//5 7//10 2//3 - 9//10 13//15 9//10 5//6 4//5 14//15 + 7//15 17//30 0 13//30 3//5 0 17//30 17//30 0 17//30 13//30 0 3//5 2//3 0 11//30 7//15 0 0 1//2 0 17//30 13//30 0 7//15 13//30 0 + 8//15 1//2 0 3//5 7//15 0 8//15 17//30 0 2//3 17//30 0 11//30 7//15 0 19//30 19//30 0 13//15 1//2 0 17//30 13//30 0 3//5 11//30 0 + 11//30 1//3 1 2//5 8//15 1 7//15 3//5 1 2//3 17//30 1 2//3 8//15 1 2//15 3//5 1 2//3 3//5 1 17//30 2//3 1 7//15 8//15 1 ] ), state_indices, action_indices, state_vars, action_vars) marginal2 = Marginal(IntervalAmbiguitySets(; lower = N[ - 1//30 1//3 1//6 1//15 2//5 2//15 - 4//15 1//4 1//6 1//30 2//15 1//30 - 2//15 7//30 1//10 7//30 7//15 1//5 + 1//10 1//15 1 3//10 0 0 1//6 1//15 0 1//15 1//6 1 1//6 1//30 0 1//10 1//10 0 1//3 2//15 1 3//10 4//15 0 2//15 2//15 0 + 3//10 1//5 0 3//10 2//15 1 0 1//30 0 0 1//15 0 1//30 7//30 1 1//30 1//15 0 7//30 1//15 0 1//6 1//30 1 1//10 1//15 0 + 3//10 4//15 0 1//10 3//10 0 2//15 1//3 1 3//10 1//10 0 1//6 3//10 0 7//30 1//6 1 1//15 1//15 0 1//10 1//5 0 1//5 4//15 1 ], upper = N[ - 2//3 7//15 4//5 11//30 19//30 1//2 - 23//30 4//5 23//30 3//5 7//10 8//15 - 7//15 4//5 23//30 7//10 7//15 23//30 + 2//5 17//30 1 3//5 11//30 0 3//5 7//15 0 19//30 2//5 1 3//5 2//3 0 2//3 8//15 0 8//15 19//30 1 8//15 8//15 0 13//30 13//30 0 + 1//3 13//30 0 11//30 2//5 1 2//3 2//3 0 0 13//30 0 1//2 17//30 1 17//30 1//3 0 2//5 1//3 0 13//30 11//30 1 8//15 1//3 0 + 17//30 3//5 0 8//15 1//2 0 7//15 1//2 1 2//3 17//30 0 11//30 2//5 0 1//2 7//15 1 2//5 17//30 0 11//30 2//5 0 11//30 2//3 1 ] ), state_indices, action_indices, state_vars, action_vars) - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 1 3//10 3//10 1 4//15 7//30 1 1//5 4//15 0 7//30 1//6 0 1//5 0 0 1//15 1//30 0 3//10 1//3 0 2//15 1//15 0 + 2//15 4//15 0 1//10 1//30 0 7//30 2//15 0 1//15 1//30 1 3//10 1//3 1 1//5 1//10 1 2//15 1//30 0 2//15 4//15 0 0 4//15 0 + 1//5 1//3 0 3//10 1//10 0 1//15 1//10 0 1//30 1//5 0 2//15 7//30 0 1//3 2//15 0 1//10 1//6 1 3//10 1//5 1 7//30 1//30 1 + ], + upper = N[ + 3//5 17//30 1 1//2 3//5 1 19//30 2//5 1 8//15 1//3 0 11//30 2//5 0 17//30 13//30 0 2//5 3//5 0 3//5 11//30 0 1//2 11//30 0 + 3//5 2//3 0 13//30 19//30 0 1//3 2//5 0 17//30 7//15 1 11//30 3//5 1 19//30 7//15 1 2//5 8//15 0 17//30 11//30 0 19//30 13//30 0 + 3//5 2//3 0 1//2 1//2 0 2//3 7//15 0 3//5 3//5 0 1//2 1//3 0 2//5 8//15 0 2//5 11//30 1 1//3 8//15 1 7//15 13//30 1 + ] + ), state_indices, action_indices, state_vars, action_vars) - V = N[ - 3 13 18 - 12 16 8 - ] + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) - #### Maximization - @testset "maximization" begin - V_vertex = N[ - 14.346666666666664 14.263333333333334 11.133333333333336 - 12.341111111111111 13.74333333333333 13.444444444444443 + # Implicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 ] + ), state_indices, action_indices, source_dims, action_vars) - ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 2, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres_first, - V, - mdp; - upper_bound = true, - ) + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ] + ), state_indices, action_indices, source_dims, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ] + ), state_indices, action_indices, source_dims, action_vars) - epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .+ epsilon .>= V_vertex) - - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) - @test Vres ≈ Vres_first - - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) - @test Vres ≈ Vres_first - end + implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) - #### Minimization - @testset "minimization" begin - V_vertex = N[ - 9.775555555555554 8.200000000000001 10.844444444444443 - 10.33 10.86 10.027777777777777 - ] + prop = FiniteTimeSafety([(3, i, j) for i in 1:3 for j in 1:3], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) + implicit_prob = VerificationProblem(implicit_mdp, spec) - ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 2, 3) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres_first, - V, - mdp; - upper_bound = false, - ) + V, k, res = solve(prob) + V_implicit, k_implicit, res_implicit = solve(implicit_prob) - epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .- epsilon .<= V_vertex) - - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) - @test Vres ≈ Vres_first - - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) - @test Vres ≈ Vres_first - end + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit end - # @testset "bellman 2d partial dependence" begin - # state_indices = (1, 2) - # action_indices = (1,) - # state_vars = (2, 3) - # action_vars = (1,) - - # marginal1 = Marginal(IntervalAmbiguitySets(; - # lower = N[ - # 1//15 7//30 1//15 13//30 4//15 1//6 - # 2//5 7//30 1//30 11//30 2//15 1//10 - # ], - # upper = N[ - # 17//30 7//10 2//3 4//5 7//10 2//3 - # 9//10 13//15 9//10 5//6 4//5 14//15 - # ] - # ), state_indices, action_indices, state_vars, action_vars) - - # marginal2 = Marginal(IntervalAmbiguitySets(; - # lower = N[ - # 1//30 1//3 1//6 - # 4//15 1//4 1//6 - # 2//15 7//30 1//10 - # ], - # upper = N[ - # 2//3 7//15 4//5 - # 23//30 4//5 23//30 - # 7//15 4//5 23//30 - # ] - # ), (2,), action_indices, (3,), action_vars) - - # mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) - - # V = N[ - # 3 13 18 - # 12 16 8 - # ] - - # #### Maximization - # @testset "maximization" begin - # V_vertex = N[ - # 14.346666666666664 14.263333333333334 11.133333333333336 - # 12.341111111111111 13.74333333333333 13.444444444444443 - # ] - - # ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) - # strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - # Vres_first = zeros(N, 2, 3) - # IntervalMDP.bellman!( - # ws, - # strategy_cache, - # Vres_first, - # V, - # mdp; - # upper_bound = true, - # ) - - # epsilon = N == Float32 ? 1e-5 : 1e-8 - # @test all(Vres_first .>= 0.0) - # @test all(Vres_first .<= maximum(V)) - # @test all(Vres_first .+ epsilon .>= V_vertex) - - # ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) - # strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - # Vres = similar(Vres_first) - # IntervalMDP.bellman!( - # ws, - # strategy_cache, - # Vres, - # V, - # mdp; - # upper_bound = true, - # ) - # @test Vres ≈ Vres_first - - # ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) - # strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - # Vres = similar(Vres_first) - # IntervalMDP.bellman!( - # ws, - # strategy_cache, - # Vres, - # V, - # mdp; - # upper_bound = true, - # ) - # @test Vres ≈ Vres_first - # end - - # #### Minimization - # @testset "minimization" begin - # V_vertex = N[ - # 9.775555555555554 8.200000000000001 10.844444444444443 - # 10.33 10.86 10.027777777777777 - # ] - - # ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) - # strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - # Vres_first = zeros(N, 2, 3) - # IntervalMDP._bellman_helper!( - # ws, - # strategy_cache, - # Vres_first, - # V, - # mdp; - # upper_bound = false, - # ) - - # epsilon = N == Float32 ? 1e-5 : 1e-8 - # @test all(Vres_first .>= 0.0) - # @test all(Vres_first .<= maximum(V)) - # @test all(Vres_first .- epsilon .<= V_vertex) - - # ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) - # strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - # Vres = similar(Vres_first) - # IntervalMDP.bellman!( - # ws, - # strategy_cache, - # Vres, - # V, - # mdp; - # upper_bound = false, - # ) - # @test Vres ≈ Vres_first - - # ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) - # strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - # Vres = similar(Vres_first) - # IntervalMDP.bellman!( - # ws, - # strategy_cache, - # Vres, - # V, - # mdp; - # upper_bound = false, - # ) - # @test Vres ≈ Vres_first - # end - # end - - @testset "bellman 3d" begin + @testset "second dimension" begin state_indices = (1, 2, 3) action_indices = (1,) state_vars = (3, 3, 3) + source_dims = (3, 2, 3) action_vars = (1,) + + # Explicit marginal1 = Marginal(IntervalAmbiguitySets(; lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1//10 1//30 1//10 1//15 1//10 1//15 4//15 4//15 1//3 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 1//5 1//5 1//10 1//30 4//15 1//10 1//5 1//6 7//30 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 3//10 1//10 1//15 1//30 2//15 1//6 1//5 1//10 4//15 + 1//15 3//10 1//15 3//10 1//30 1//3 1 0 0 7//30 4//15 1//6 1//5 1//10 1//5 1 0 0 0 7//30 7//30 1//5 2//15 1//6 1 0 0 + 1//5 4//15 1//10 1//5 3//10 3//10 0 1 0 1//10 1//15 3//10 3//10 7//30 1//5 0 1 0 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 + 4//15 1//30 1//5 1//5 7//30 4//15 0 0 1 2//15 7//30 1//5 1//3 2//15 1//6 0 0 1 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 ], upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 17//30 13//30 2//5 2//5 2//3 2//5 17//30 2//5 19//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 8//15 7//15 7//15 13//30 8//15 2//5 8//15 17//30 3//5 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 2//5 2//5 11//30 17//30 17//30 1//2 2//5 19//30 13//30 + 7//15 17//30 13//30 3//5 17//30 17//30 1 0 0 17//30 13//30 3//5 2//3 11//30 7//15 1 0 0 0 1//2 17//30 13//30 7//15 13//30 1 0 0 + 8//15 1//2 3//5 7//15 8//15 17//30 0 1 0 2//3 17//30 11//30 7//15 19//30 19//30 0 1 0 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 + 11//30 1//3 2//5 8//15 7//15 3//5 0 0 1 2//3 17//30 2//3 8//15 2//15 3//5 0 0 1 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 ] ), state_indices, action_indices, state_vars, action_vars) - + marginal2 = Marginal(IntervalAmbiguitySets(; lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1//6 7//30 1//15 2//15 1//10 1//3 7//30 1//30 7//30 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 3//10 0 3//10 1//6 3//10 1//5 0 7//30 2//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 1//15 1//3 2//15 1//15 1//5 1//5 1//15 7//30 1//15 + 1//10 1//15 3//10 0 1//6 1//15 0 0 0 1//15 1//6 1//6 1//30 1//10 1//10 0 0 0 1//3 2//15 3//10 4//15 2//15 2//15 0 0 0 + 3//10 1//5 3//10 2//15 0 1//30 0 0 0 0 1//15 1//30 7//30 1//30 1//15 0 0 0 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 + 3//10 4//15 1//10 3//10 2//15 1//3 1 1 1 3//10 1//10 1//6 3//10 7//30 1//6 1 1 1 1//15 1//15 1//10 1//5 1//5 4//15 1 1 1 ], upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 13//30 17//30 17//30 13//30 11//30 19//30 8//15 2//5 8//15 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 1//2 8//15 8//15 8//15 8//15 2//5 3//5 2//3 13//30 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 1//3 2//3 17//30 8//15 17//30 3//5 2//5 19//30 11//30 + 2//5 17//30 3//5 11//30 3//5 7//15 0 0 0 19//30 2//5 3//5 2//3 2//3 8//15 0 0 0 8//15 19//30 8//15 8//15 13//30 13//30 0 0 0 + 1//3 13//30 11//30 2//5 2//3 2//3 0 0 0 0 13//30 1//2 17//30 17//30 1//3 0 0 0 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 + 17//30 3//5 8//15 1//2 7//15 1//2 1 1 1 2//3 17//30 11//30 2//5 1//2 7//15 1 1 1 2//5 17//30 11//30 2//5 11//30 2//3 1 1 1 ] ), state_indices, action_indices, state_vars, action_vars) marginal3 = Marginal(IntervalAmbiguitySets(; lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 7//30 4//15 1//10 1//3 1//5 7//30 1//30 1//5 7//30 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 1//5 4//15 1//10 1//10 1//3 7//30 3//10 1//3 3//10 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 0 1//30 1//15 2//15 1//6 7//30 4//15 4//15 7//30 + 4//15 1//5 3//10 3//10 4//15 7//30 1 1 1 1//5 4//15 7//30 1//6 1//5 0 0 0 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 + 2//15 4//15 1//10 1//30 7//30 2//15 0 0 0 1//15 1//30 3//10 1//3 1//5 1//10 1 1 1 2//15 1//30 2//15 4//15 0 4//15 0 0 0 + 1//5 1//3 3//10 1//10 1//15 1//10 0 0 0 1//30 1//5 2//15 7//30 1//3 2//15 0 0 0 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 ], upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 2//3 17//30 3//5 7//15 19//30 1//2 3//5 1//3 19//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 2//3 17//30 8//15 13//30 13//30 3//5 1//2 8//15 8//15 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 0 2//5 11//30 19//30 19//30 2//5 1//2 7//15 7//15 + 3//5 17//30 1//2 3//5 19//30 2//5 1 1 1 8//15 1//3 11//30 2//5 17//30 13//30 0 0 0 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 + 3//5 2//3 13//30 19//30 1//3 2//5 0 0 0 17//30 7//15 11//30 3//5 19//30 7//15 1 1 1 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 + 3//5 2//3 1//2 1//2 2//3 7//15 0 0 0 3//5 3//5 1//2 1//3 2//5 8//15 0 0 0 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 ] ), state_indices, action_indices, state_vars, action_vars) mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) - V = N[ - 23, - 27, - 16, - 6, - 26, - 17, - 12, - 9, - 8, - 22, - 1, - 21, - 11, - 24, - 4, - 10, - 13, - 19, - 3, - 14, - 25, - 20, - 18, - 7, - 5, - 15, - 2, - ] - V = reshape(V, 3, 3, 3) - - #### Maximization - @testset "maximization" begin - V_vertex = N[ - 16.19533333333333, - 15.225999999999996, - 17.999333333333325, - 15.795888888888884, - 17.75407407407408, - 14.759111111111114, - 16.94551851851852, - 15.592148148148148, - 15.816333333333333, - 15.059555555555557, - 16.611333333333334, - 16.774814814814814, - 18.133333333333333, - 17.964999999999996, - 17.491666666666664, - 15.506666666666668, - 16.986962962962956, - 14.952518518518515, - 18.215555555555554, - 16.101592592592596, - 17.483888888888895, - 17.05688888888889, - 16.394444444444442, - 16.340666666666667, - 16.880444444444446, - 16.045185185185186, - 16.494074074074074, - ] - V_vertex = reshape(V_vertex, (3, 3, 3)) - - ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres_first, - V, - mdp; - upper_bound = true, - ) + # Implicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ] + ), state_indices, action_indices, source_dims, action_vars) - epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .+ epsilon .>= V_vertex) - - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) - @test Vres ≈ Vres_first - - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) - @test Vres ≈ Vres_first - end + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ] + ), state_indices, action_indices, source_dims, action_vars) - #### Minimization - @testset "minimization" begin - V_vertex = N[ - 10.856370370370371, - 11.341333333333333, - 12.446333333333333, - 11.282999999999996, - 12.351592592592592, - 12.094370370370369, - 9.957037037037036, - 9.98859259259259, - 12.769888888888888, - 12.433333333333332, - 12.526444444444442, - 11.579851851851851, - 13.808888888888886, - 11.394074074074076, - 13.171555555555557, - 12.11111111111111, - 12.080148148148147, - 11.569777777777778, - 13.288888888888888, - 11.581629629629631, - 11.545259259259257, - 10.001851851851853, - 11.602074074074073, - 12.530444444444445, - 12.070666666666666, - 10.503851851851852, - 12.760740740740742, + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 ] - V_vertex = reshape(V_vertex, (3, 3, 3)) - - ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 3, 3, 3) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres_first, - V, - mdp; - upper_bound = false, - ) + ), state_indices, action_indices, source_dims, action_vars) - epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .- epsilon .<= V_vertex) - - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) - @test Vres ≈ Vres_first - - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) - strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) - @test Vres ≈ Vres_first - end - end + implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) - @testset "implicit sink state" begin - @testset "first dimension" begin - state_indices = (1, 2, 3) - action_indices = (1,) - state_vars = (3, 3, 3) - source_dims = (2, 3, 3) - action_vars = (1,) - - # Explicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 0 1//15 3//10 0 1//30 1//3 0 7//30 4//15 0 1//6 1//5 0 1//10 1//5 0 0 7//30 0 7//30 1//5 0 2//15 1//6 0 - 1//5 4//15 0 1//10 1//5 0 3//10 3//10 0 1//10 1//15 0 3//10 3//10 0 7//30 1//5 0 1//10 1//5 0 1//5 1//30 0 1//5 3//10 0 - 4//15 1//30 1 1//5 1//5 1 7//30 4//15 1 2//15 7//30 1 1//5 1//3 1 2//15 1//6 1 1//6 1//3 1 4//15 3//10 1 1//30 3//10 1 - ], - upper = N[ - 7//15 17//30 0 13//30 3//5 0 17//30 17//30 0 17//30 13//30 0 3//5 2//3 0 11//30 7//15 0 0 1//2 0 17//30 13//30 0 7//15 13//30 0 - 8//15 1//2 0 3//5 7//15 0 8//15 17//30 0 2//3 17//30 0 11//30 7//15 0 19//30 19//30 0 13//15 1//2 0 17//30 13//30 0 3//5 11//30 0 - 11//30 1//3 1 2//5 8//15 1 7//15 3//5 1 2//3 17//30 1 2//3 8//15 1 2//15 3//5 1 2//3 3//5 1 17//30 2//3 1 7//15 8//15 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 1 3//10 0 0 1//6 1//15 0 1//15 1//6 1 1//6 1//30 0 1//10 1//10 0 1//3 2//15 1 3//10 4//15 0 2//15 2//15 0 - 3//10 1//5 0 3//10 2//15 1 0 1//30 0 0 1//15 0 1//30 7//30 1 1//30 1//15 0 7//30 1//15 0 1//6 1//30 1 1//10 1//15 0 - 3//10 4//15 0 1//10 3//10 0 2//15 1//3 1 3//10 1//10 0 1//6 3//10 0 7//30 1//6 1 1//15 1//15 0 1//10 1//5 0 1//5 4//15 1 - ], - upper = N[ - 2//5 17//30 1 3//5 11//30 0 3//5 7//15 0 19//30 2//5 1 3//5 2//3 0 2//3 8//15 0 8//15 19//30 1 8//15 8//15 0 13//30 13//30 0 - 1//3 13//30 0 11//30 2//5 1 2//3 2//3 0 0 13//30 0 1//2 17//30 1 17//30 1//3 0 2//5 1//3 0 13//30 11//30 1 8//15 1//3 0 - 17//30 3//5 0 8//15 1//2 0 7//15 1//2 1 2//3 17//30 0 11//30 2//5 0 1//2 7//15 1 2//5 17//30 0 11//30 2//5 0 11//30 2//3 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 1 3//10 3//10 1 4//15 7//30 1 1//5 4//15 0 7//30 1//6 0 1//5 0 0 1//15 1//30 0 3//10 1//3 0 2//15 1//15 0 - 2//15 4//15 0 1//10 1//30 0 7//30 2//15 0 1//15 1//30 1 3//10 1//3 1 1//5 1//10 1 2//15 1//30 0 2//15 4//15 0 0 4//15 0 - 1//5 1//3 0 3//10 1//10 0 1//15 1//10 0 1//30 1//5 0 2//15 7//30 0 1//3 2//15 0 1//10 1//6 1 3//10 1//5 1 7//30 1//30 1 - ], - upper = N[ - 3//5 17//30 1 1//2 3//5 1 19//30 2//5 1 8//15 1//3 0 11//30 2//5 0 17//30 13//30 0 2//5 3//5 0 3//5 11//30 0 1//2 11//30 0 - 3//5 2//3 0 13//30 19//30 0 1//3 2//5 0 17//30 7//15 1 11//30 3//5 1 19//30 7//15 1 2//5 8//15 0 17//30 11//30 0 19//30 13//30 0 - 3//5 2//3 0 1//2 1//2 0 2//3 7//15 0 3//5 3//5 0 1//2 1//3 0 2//5 8//15 0 2//5 11//30 1 1//3 8//15 1 7//15 13//30 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) - - # Implicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ] - ), state_indices, action_indices, source_dims, action_vars) - - implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) - - prop = FiniteTimeSafety([(3, i, j) for i in 1:3 for j in 1:3], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(mdp, spec) - implicit_prob = VerificationProblem(implicit_mdp, spec) - - V, k, res = solve(prob) - V_implicit, k_implicit, res_implicit = solve(implicit_prob) - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + prop = FiniteTimeSafety([(i, 3, j) for i in 1:3 for j in 1:3], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) + implicit_prob = VerificationProblem(implicit_mdp, spec) - @testset "second dimension" begin - state_indices = (1, 2, 3) - action_indices = (1,) - state_vars = (3, 3, 3) - source_dims = (3, 2, 3) - action_vars = (1,) - - # Explicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 1 0 0 7//30 4//15 1//6 1//5 1//10 1//5 1 0 0 0 7//30 7//30 1//5 2//15 1//6 1 0 0 - 1//5 4//15 1//10 1//5 3//10 3//10 0 1 0 1//10 1//15 3//10 3//10 7//30 1//5 0 1 0 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 - 4//15 1//30 1//5 1//5 7//30 4//15 0 0 1 2//15 7//30 1//5 1//3 2//15 1//6 0 0 1 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 1 0 0 17//30 13//30 3//5 2//3 11//30 7//15 1 0 0 0 1//2 17//30 13//30 7//15 13//30 1 0 0 - 8//15 1//2 3//5 7//15 8//15 17//30 0 1 0 2//3 17//30 11//30 7//15 19//30 19//30 0 1 0 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 - 11//30 1//3 2//5 8//15 7//15 3//5 0 0 1 2//3 17//30 2//3 8//15 2//15 3//5 0 0 1 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 0 0 0 1//15 1//6 1//6 1//30 1//10 1//10 0 0 0 1//3 2//15 3//10 4//15 2//15 2//15 0 0 0 - 3//10 1//5 3//10 2//15 0 1//30 0 0 0 0 1//15 1//30 7//30 1//30 1//15 0 0 0 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 - 3//10 4//15 1//10 3//10 2//15 1//3 1 1 1 3//10 1//10 1//6 3//10 7//30 1//6 1 1 1 1//15 1//15 1//10 1//5 1//5 4//15 1 1 1 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 0 0 0 19//30 2//5 3//5 2//3 2//3 8//15 0 0 0 8//15 19//30 8//15 8//15 13//30 13//30 0 0 0 - 1//3 13//30 11//30 2//5 2//3 2//3 0 0 0 0 13//30 1//2 17//30 17//30 1//3 0 0 0 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 - 17//30 3//5 8//15 1//2 7//15 1//2 1 1 1 2//3 17//30 11//30 2//5 1//2 7//15 1 1 1 2//5 17//30 11//30 2//5 11//30 2//3 1 1 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1 1 1 1//5 4//15 7//30 1//6 1//5 0 0 0 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 - 2//15 4//15 1//10 1//30 7//30 2//15 0 0 0 1//15 1//30 3//10 1//3 1//5 1//10 1 1 1 2//15 1//30 2//15 4//15 0 4//15 0 0 0 - 1//5 1//3 3//10 1//10 1//15 1//10 0 0 0 1//30 1//5 2//15 7//30 1//3 2//15 0 0 0 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 1 1 1 8//15 1//3 11//30 2//5 17//30 13//30 0 0 0 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 - 3//5 2//3 13//30 19//30 1//3 2//5 0 0 0 17//30 7//15 11//30 3//5 19//30 7//15 1 1 1 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 - 3//5 2//3 1//2 1//2 2//3 7//15 0 0 0 3//5 3//5 1//2 1//3 2//5 8//15 0 0 0 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) - - # Implicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ] - ), state_indices, action_indices, source_dims, action_vars) - - implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) - - prop = FiniteTimeSafety([(i, 3, j) for i in 1:3 for j in 1:3], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(mdp, spec) - implicit_prob = VerificationProblem(implicit_mdp, spec) - - V, k, res = solve(prob) - V_implicit, k_implicit, res_implicit = solve(implicit_prob) - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + V, k, res = solve(prob) + V_implicit, k_implicit, res_implicit = solve(implicit_prob) - @testset "last dimension" begin - state_indices = (1, 2, 3) - action_indices = (1,) - state_vars = (3, 3, 3) - source_dims = (3, 3, 2) - action_vars = (1,) - - # Explicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1 0 0 1 0 0 1 0 0 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 0 1 0 0 1 0 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 0 0 1 0 0 1 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 1 0 0 1 0 0 1 0 0 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 0 1 0 0 1 0 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 0 0 1 0 0 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1 1 1 0 0 0 0 0 0 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 1 1 1 0 0 0 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 0 0 0 0 0 0 1 1 1 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 1 1 1 0 0 0 0 0 0 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 1 1 1 0 0 0 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 0 0 0 0 0 0 1 1 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 0 0 0 0 0 0 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 0 0 0 0 0 0 0 0 0 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 1 1 1 1 1 1 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 0 0 0 0 0 0 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 0 0 0 0 0 0 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 1 1 1 1 1 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) - - # Implicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ] - ), state_indices, action_indices, source_dims, action_vars) - - implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) - - prop = FiniteTimeSafety([(i, j, 3) for i in 1:3 for j in 1:3], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(mdp, spec) - implicit_prob = VerificationProblem(implicit_mdp, spec) - - V, k, res = solve(prob) - V_implicit, k_implicit, res_implicit = solve(implicit_prob) - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit end - end -end -for N in [Float32, Float64] - @testset "N = $N" begin - # 4-D abstraction - @testset "4D abstraction" begin - rng = MersenneTwister(995) - - prob_lower = [rand(rng, N, 3, 81) ./ N(3) for _ in 1:4] - prob_upper = [(rand(rng, N, 3, 81) .+ N(1)) ./ N(3) for _ in 1:4] - - ambiguity_sets = ntuple( - i -> IntervalAmbiguitySets(; - lower = prob_lower[i], - upper = prob_upper[i], - ), - 4, - ) + @testset "last dimension" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + source_dims = (3, 3, 2) + action_vars = (1,) - marginals = ntuple( - i -> Marginal(ambiguity_sets[i], (1, 2, 3, 4), (1,), (3, 3, 3, 3), (1,)), - 4, - ) + # Explicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1 0 0 1 0 0 1 0 0 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 0 1 0 0 1 0 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 0 0 1 0 0 1 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 1 0 0 1 0 0 1 0 0 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 0 1 0 0 1 0 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 0 0 1 0 0 1 + ] + ), state_indices, action_indices, state_vars, action_vars) - mdp = FactoredRobustMarkovDecisionProcess((3, 3, 3, 3), (1,), marginals) + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1 1 1 0 0 0 0 0 0 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 1 1 1 0 0 0 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 0 0 0 0 0 0 1 1 1 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 1 1 1 0 0 0 0 0 0 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 1 1 1 0 0 0 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 0 0 0 0 0 0 1 1 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 0 0 0 0 0 0 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 0 0 0 0 0 0 0 0 0 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 1 1 1 1 1 1 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 0 0 0 0 0 0 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 0 0 0 0 0 0 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 1 1 1 1 1 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + # Implicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ] + ), state_indices, action_indices, source_dims, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ] + ), state_indices, action_indices, source_dims, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ] + ), state_indices, action_indices, source_dims, action_vars) - prop = FiniteTimeReachability([(3, 3, 3, 3)], 10) + implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + + prop = FiniteTimeSafety([(i, j, 3) for i in 1:3 for j in 1:3], 10) spec = Specification(prop, Pessimistic, Maximize) prob = VerificationProblem(mdp, spec) + implicit_prob = VerificationProblem(implicit_mdp, spec) - V_ortho, it_ortho, res_ortho = solve(prob) + V, k, res = solve(prob) + V_implicit, k_implicit, res_implicit = solve(implicit_prob) - @test V_ortho[3, 3, 3, 3] ≈ one(N) - @test all(V_ortho .>= zero(N)) - @test all(V_ortho .<= one(N)) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end + end +end - # Test against the naive construction - prob_lower_simple = zeros(N, 81, 81) - prob_upper_simple = zeros(N, 81, 81) +@testset for N in [Float32, Float64] + # 4-D abstraction + @testset "4D abstraction" begin + rng = MersenneTwister(995) - lin = LinearIndices((3, 3, 3, 3)) - act_idx = CartesianIndex(1) - for I in CartesianIndices((3, 3, 3, 3)) - for J in CartesianIndices((3, 3, 3, 3)) - marginal_ambiguity_sets = map(marginal -> marginal[act_idx, I], marginals) + prob_lower = [rand(rng, N, 3, 81) ./ N(3) for _ in 1:4] + prob_upper = [(rand(rng, N, 3, 81) .+ N(1)) ./ N(3) for _ in 1:4] - prob_lower_simple[lin[J], lin[I]] = prod( - lower(marginal_ambiguity_sets[i], J[i]) for i in 1:4 - ) + ambiguity_sets = ntuple( + i -> IntervalAmbiguitySets(; + lower = prob_lower[i], + upper = prob_upper[i], + ), + 4, + ) - prob_upper_simple[lin[J], lin[I]] = prod( - upper(marginal_ambiguity_sets[i], J[i]) for i in 1:4 - ) - end - end + marginals = ntuple( + i -> Marginal(ambiguity_sets[i], (1, 2, 3, 4), (1,), (3, 3, 3, 3), (1,)), + 4, + ) - ambiguity_set = IntervalAmbiguitySets(; - lower = prob_lower_simple, - upper = prob_upper_simple, - ) + mdp = FactoredRobustMarkovDecisionProcess((3, 3, 3, 3), (1,), marginals) - imc = IntervalMarkovChain(ambiguity_set) + prop = FiniteTimeReachability([(3, 3, 3, 3)], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) - prop = FiniteTimeReachability([81], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(imc, spec) + V_ortho, it_ortho, res_ortho = solve(prob) - V_direct, it_direct, res_direct = solve(prob) - @test V_direct[81] ≈ one(N) - @test all(V_ortho .≥ reshape(V_direct, 3, 3, 3, 3)) - end + @test V_ortho[3, 3, 3, 3] ≈ one(N) + @test all(V_ortho .>= zero(N)) + @test all(V_ortho .<= one(N)) - @testset "synthesis" begin - rng = MersenneTwister(3286) + # Test against the naive construction + prob_lower_simple = zeros(N, 81, 81) + prob_upper_simple = zeros(N, 81, 81) - num_states_per_axis = 3 - num_axis = 3 - num_states = num_states_per_axis^num_axis - num_actions = 2 - num_choices = num_states * num_actions - state_indices = (1, 2, 3) - action_indices = (1,) - state_vars = ntuple(_ -> num_states_per_axis, num_axis) - action_vars = (num_actions,) + lin = LinearIndices((3, 3, 3, 3)) + act_idx = CartesianIndex(1) + for I in CartesianIndices((3, 3, 3, 3)) + for J in CartesianIndices((3, 3, 3, 3)) + marginal_ambiguity_sets = map(marginal -> marginal[act_idx, I], marginals) - prob_lower = [ - rand(rng, N, num_states_per_axis, num_choices) ./ num_states_per_axis - for _ in 1:num_axis - ] - prob_upper = [ - (rand(rng, N, num_states_per_axis, num_choices) .+ N(1)) ./ - num_states_per_axis for _ in 1:num_axis - ] + prob_lower_simple[lin[J], lin[I]] = prod( + lower(marginal_ambiguity_sets[i], J[i]) for i in 1:4 + ) - ambiguity_sets = ntuple( - i -> IntervalAmbiguitySets(; - lower = prob_lower[i], - upper = prob_upper[i], - ), - num_axis, - ) + prob_upper_simple[lin[J], lin[I]] = prod( + upper(marginal_ambiguity_sets[i], J[i]) for i in 1:4 + ) + end + end - marginals = ntuple( - i -> Marginal(ambiguity_sets[i], state_indices, action_indices, state_vars, action_vars), - num_axis, - ) + ambiguity_set = IntervalAmbiguitySets(; + lower = prob_lower_simple, + upper = prob_upper_simple, + ) - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, marginals) + imc = IntervalMarkovChain(ambiguity_set) - prop = FiniteTimeReachability( - [(num_states_per_axis, num_states_per_axis, num_states_per_axis)], - 10, - ) - spec = Specification(prop, Pessimistic, Maximize) - prob = ControlSynthesisProblem(mdp, spec) + prop = FiniteTimeReachability([81], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(imc, spec) - policy, V, it, res = solve(prob) - @test it == 10 - @test all(V .≥ 0.0) + V_direct, it_direct, res_direct = solve(prob) + @test V_direct[81] ≈ one(N) + @test all(V_ortho .≥ reshape(V_direct, 3, 3, 3, 3)) + end - # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP - prob = VerificationProblem(mdp, spec, policy) - V_mc, k, res = solve(prob) - @test V ≈ V_mc - end + @testset "synthesis" begin + rng = MersenneTwister(3286) + + num_states_per_axis = 3 + num_axis = 3 + num_states = num_states_per_axis^num_axis + num_actions = 2 + num_choices = num_states * num_actions + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = ntuple(_ -> num_states_per_axis, num_axis) + action_vars = (num_actions,) + + prob_lower = [ + rand(rng, N, num_states_per_axis, num_choices) ./ num_states_per_axis + for _ in 1:num_axis + ] + prob_upper = [ + (rand(rng, N, num_states_per_axis, num_choices) .+ N(1)) ./ + num_states_per_axis for _ in 1:num_axis + ] + + ambiguity_sets = ntuple( + i -> IntervalAmbiguitySets(; + lower = prob_lower[i], + upper = prob_upper[i], + ), + num_axis, + ) + + marginals = ntuple( + i -> Marginal(ambiguity_sets[i], state_indices, action_indices, state_vars, action_vars), + num_axis, + ) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, marginals) + + prop = FiniteTimeReachability( + [(num_states_per_axis, num_states_per_axis, num_states_per_axis)], + 10, + ) + spec = Specification(prop, Pessimistic, Maximize) + prob = ControlSynthesisProblem(mdp, spec) + + policy, V, it, res = solve(prob) + @test it == 10 + @test all(V .≥ 0.0) + + # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP + prob = VerificationProblem(mdp, spec, policy) + V_mc, k, res = solve(prob) + @test V ≈ V_mc end end diff --git a/test/base/imdp.jl b/test/base/imdp.jl index c39a7463..1b94a3f2 100644 --- a/test/base/imdp.jl +++ b/test/base/imdp.jl @@ -1,627 +1,635 @@ using Revise, Test using IntervalMDP -prob1 = IntervalAmbiguitySets(; - lower = [ - 0.0 0.5 - 0.1 0.3 - 0.2 0.1 - ], - upper = [ - 0.5 0.7 - 0.6 0.5 - 0.7 0.3 - ], -) - -prob2 = IntervalAmbiguitySets(; - lower = [ - 0.1 0.2 - 0.2 0.3 - 0.3 0.4 - ], - upper = [ - 0.6 0.6 - 0.5 0.5 - 0.4 0.4 - ], -) - -prob3 = IntervalAmbiguitySets(; - lower = [ - 0.0 0.0 - 0.0 0.0 - 1.0 1.0 - ], - upper = [ - 0.0 0.0 - 0.0 0.0 - 1.0 1.0 - ] -) - -transition_probs = [prob1, prob2, prob3] -istates = [1] - -mdp = IntervalMarkovDecisionProcess(transition_probs, istates) -@test initial_states(mdp) == istates - -mdp = IntervalMarkovDecisionProcess(transition_probs) - -@testset "bellman" begin - V = [1.0, 2.0, 3.0] - Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true) - @test Vres ≈ [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.3 * 1 + 0.3 * 2 + 0.4 * 3, 1.0 * 3] - - Vres = similar(Vres) - IntervalMDP.bellman!(Vres, V, mdp; upper_bound = false, maximize = true) - @test Vres ≈ [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.3 * 1 + 0.3 * 2 + 0.4 * 3, 1.0 * 3] -end - -@testset "explicit sink state" begin - transition_prob = IntervalMDP.interval_prob_hcat(transition_probs) - @test_throws DimensionMismatch IntervalMarkovChain(transition_prob) - - # Finite time reachability - @testset "finite time reachability" begin - prop = FiniteTimeReachability([3], 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - @test V_fixed_it1[3] == 1.0 - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - @test V_fixed_it2[3] == 1.0 - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - @test V_fixed_it1[3] == 1.0 - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - @test V_fixed_it2[3] == 1.0 +@testset for N in [Float32, Float64, Rational{BigInt}] + prob1 = IntervalAmbiguitySets(; + lower = N[ + 0 1//2 + 1//10 3//10 + 1//5 1//10 + ], + upper = N[ + 1//2 7//10 + 3//5 1//2 + 7//10 3//10 + ], + ) + + prob2 = IntervalAmbiguitySets(; + lower = N[ + 1//10 1//5 + 1//5 3//10 + 3//10 2//5 + ], + upper = N[ + 3//5 3//5 + 1//2 1//2 + 2//5 2//5 + ], + ) + + prob3 = IntervalAmbiguitySets(; + lower = N[ + 0 0 + 0 0 + 1 1 + ], + upper = N[ + 0 0 + 0 0 + 1 1 + ] + ) + + transition_probs = [prob1, prob2, prob3] + istates = [1] + + mdp = IntervalMarkovDecisionProcess(transition_probs, istates) + @test initial_states(mdp) == istates + + mdp = IntervalMarkovDecisionProcess(transition_probs) + + @testset "bellman" begin + V = N[1, 2, 3] + Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true) + @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] + + Vres = similar(Vres) + IntervalMDP.bellman!(Vres, V, mdp; upper_bound = false, maximize = true) + @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] end - # Infinite time reachability - @testset "infinite time reachability" begin - prop = InfiniteTimeReachability([3], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv, _, u = solve(problem) - @test maximum(u) <= 1e-6 - @test all(V_conv .>= 0.0) - @test V_conv[3] == 1.0 + @testset "explicit sink state" begin + transition_prob = IntervalMDP.interval_prob_hcat(transition_probs) + @test_throws DimensionMismatch IntervalMarkovChain(transition_prob) + + # Finite time reachability + @testset "finite time reachability" begin + prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test V_fixed_it1[3] == N(1) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test V_fixed_it1[3] == N(1) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + end + + # Infinite time reachability + @testset "infinite time reachability" begin + prop = InfiniteTimeReachability([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test V_conv[3] == N(1) + end + + # Exact time reachability + @testset "exact time reachability" begin + prop = ExactTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + + # Compare exact time to finite time + prop = ExactTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + + prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + end + + # Finite time reach avoid + @testset "finite time reach/avoid" begin + prop = FiniteTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + @test V_fixed_it1[3] == N(1) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + @test V_fixed_it2[2] == N(0) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + @test V_fixed_it1[3] == N(1) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + @test V_fixed_it2[2] == N(0) + end + + # Infinite time reach avoid + @testset "infinite time reach/avoid" begin + prop = InfiniteTimeReachAvoid([3], [2], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + @test V_conv[3] == N(1) + @test V_conv[2] == N(0) + end + + # Exact time reach avoid + @testset "exact time reach/avoid" begin + prop = ExactTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[2] == N(0) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[2] == N(0) + + # Compare exact time to finite time + prop = ExactTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + + prop = FiniteTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + end + + # Finite time reward + @testset "finite time reward" begin + prop = FiniteTimeReward(N[2, 1, 0], N(9//10), 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + end + + # Infinite time reward + @testset "infinite time reward" begin + prop = InfiniteTimeReward(N[2, 1, 0], N(9//10), N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + end + + # Expected exit time + @testset "expected exit time" begin + prop = ExpectedExitTime([3], N(1//1_000_000)) + + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv1, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .>= N(0)) + @test V_conv1[3] == N(0) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv2, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .<= V_conv2) + @test V_conv2[3] == N(0) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_conv1, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .>= N(0)) + @test V_conv1[3] == N(0) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_conv2, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .<= V_conv2) + @test V_conv2[3] == N(0) + end end - # Exact time reachability - @testset "exact time reachability" begin - prop = ExactTimeReachability([3], 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - - # Compare exact time to finite time - prop = ExactTimeReachability([3], 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - - prop = FiniteTimeReachability([3], 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - end - - # Finite time reach avoid - @testset "finite time reach/avoid" begin - prop = FiniteTimeReachAvoid([3], [2], 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - @test V_fixed_it1[3] == 1.0 - @test V_fixed_it1[2] == 0.0 - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - @test V_fixed_it2[3] == 1.0 - @test V_fixed_it2[2] == 0.0 - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - @test V_fixed_it1[3] == 1.0 - @test V_fixed_it1[2] == 0.0 - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - @test V_fixed_it2[3] == 1.0 - @test V_fixed_it2[2] == 0.0 - end - - # Infinite time reach avoid - @testset "infinite time reach/avoid" begin - prop = InfiniteTimeReachAvoid([3], [2], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv, _, u = solve(problem) - @test maximum(u) <= 1e-6 - @test all(V_conv .>= 0.0) - @test V_conv[3] == 1.0 - @test V_conv[2] == 0.0 - end - - # Exact time reach avoid - @testset "exact time reach/avoid" begin - prop = ExactTimeReachAvoid([3], [2], 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - @test V_fixed_it1[2] == 0.0 - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - @test V_fixed_it2[2] == 0.0 - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - @test V_fixed_it1[2] == 0.0 - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - @test V_fixed_it2[2] == 0.0 - - # Compare exact time to finite time - prop = ExactTimeReachAvoid([3], [2], 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - - prop = FiniteTimeReachAvoid([3], [2], 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - end - - # Finite time reward - @testset "finite time reward" begin - prop = FiniteTimeReward([2.0, 1.0, 0.0], 0.9, 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - end - - # Infinite time reward - @testset "infinite time reward" begin - prop = InfiniteTimeReward([2.0, 1.0, 0.0], 0.9, 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv, _, u = solve(problem) - @test maximum(u) <= 1e-6 - @test all(V_conv .>= 0.0) - end + @testset "implicit sink state" begin + transition_probs = [prob1, prob2] + implicit_mdp = IntervalMarkovDecisionProcess(transition_probs) - # Expected exit time - @testset "expected exit time" begin - prop = ExpectedExitTime([3], 1e-6) - - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv1, _, u = solve(problem) - @test maximum(u) <= 1e-6 - @test all(V_conv1 .>= 0.0) - @test V_conv1[3] == 0.0 - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv2, _, u = solve(problem) - @test maximum(u) <= 1e-6 - @test all(V_conv1 .<= V_conv2) - @test V_conv2[3] == 0.0 - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_conv1, _, u = solve(problem) - @test maximum(u) <= 1e-6 - @test all(V_conv1 .>= 0.0) - @test V_conv1[3] == 0.0 - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_conv2, _, u = solve(problem) - @test maximum(u) <= 1e-6 - @test all(V_conv1 .<= V_conv2) - @test V_conv2[3] == 0.0 - end -end + # Finite time reachability + @testset "finite time reachability" begin + prop = FiniteTimeReachability([3], 10) -@testset "implicit sink state" begin - transition_probs = [prob1, prob2] - implicit_mdp = IntervalMarkovDecisionProcess(transition_probs) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - # Finite time reachability - @testset "finite time reachability" begin - prop = FiniteTimeReachability([3], 10) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + # Infinite time reachability + @testset "infinite time reachability" begin + prop = InfiniteTimeReachability([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - # Infinite time reachability - @testset "infinite time reachability" begin - prop = InfiniteTimeReachability([3], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + # Exact time reachability + @testset "exact time reachability" begin + prop = ExactTimeReachability([3], 10) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - # Exact time reachability - @testset "exact time reachability" begin - prop = ExactTimeReachability([3], 10) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + # Finite time reach avoid + @testset "finite time reach/avoid" begin + prop = FiniteTimeReachAvoid([3], [2], 10) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - # Finite time reach avoid - @testset "finite time reach/avoid" begin - prop = FiniteTimeReachAvoid([3], [2], 10) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + # Infinite time reach avoid + @testset "infinite time reach/avoid" begin + prop = InfiniteTimeReachAvoid([3], [2], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - # Infinite time reach avoid - @testset "infinite time reach/avoid" begin - prop = InfiniteTimeReachAvoid([3], [2], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + # Exact time reach avoid + @testset "exact time reach/avoid" begin + prop = ExactTimeReachAvoid([3], [2], 10) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - # Exact time reach avoid - @testset "exact time reach/avoid" begin - prop = ExactTimeReachAvoid([3], [2], 10) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + # Finite time reward + @testset "finite time reward" begin + prop = FiniteTimeReward(N[2, 1, 0], N(9//10), 10) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end - - # Finite time reward - @testset "finite time reward" begin - prop = FiniteTimeReward([2.0, 1.0, 0.0], 0.9, 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + # Infinite time reward + @testset "infinite time reward" begin + prop = InfiniteTimeReward(N[2, 1, 0], N(9//10), N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - # Infinite time reward - @testset "infinite time reward" begin - prop = InfiniteTimeReward([2.0, 1.0, 0.0], 0.9, 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - # Expected exit time - @testset "expected exit time" begin - prop = ExpectedExitTime([3], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) + # Expected exit time + @testset "expected exit time" begin + prop = ExpectedExitTime([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end end -end +end \ No newline at end of file diff --git a/test/base/vi.jl b/test/base/vi.jl index 41974f04..701cfad1 100644 --- a/test/base/vi.jl +++ b/test/base/vi.jl @@ -1,117 +1,127 @@ using Revise, Test using IntervalMDP -prob = IntervalAmbiguitySets(; - lower = [ - 0.0 0.5 0.0 - 0.1 0.3 0.0 - 0.2 0.1 1.0 - ], - upper = [ - 0.5 0.7 0.0 - 0.6 0.5 0.0 - 0.7 0.3 1.0 - ], -) - -mc = IntervalMarkovChain(prob, [1]) -@test initial_states(mc) == [1] - -mc = IntervalMarkovChain(prob) - -prop = FiniteTimeReachability([3], 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -sol = solve(problem) -V_fixed_it, k, res = sol -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -@test value_function(sol) == V_fixed_it -@test num_iterations(sol) == k -@test residual(sol) == res - -prop = FiniteTimeReachability([3], 11) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it2, k, _ = solve(problem) -@test k == 11 -@test all(V_fixed_it .<= V_fixed_it2) - -prop = InfiniteTimeReachability([3], 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = FiniteTimeReachAvoid([3], [2], 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -prop = FiniteTimeReachAvoid([3], [2], 11) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it2, k, _ = solve(problem) -@test k == 11 -@test all(V_fixed_it2 .>= 0.0) -@test all(V_fixed_it .<= V_fixed_it2) - -prop = InfiniteTimeReachAvoid([3], [2], 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = FiniteTimeSafety([3], 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -prop = FiniteTimeSafety([3], 11) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it2, k, _ = solve(problem) -@test k == 11 -@test all(V_fixed_it2 .>= 0.0) -@test all(V_fixed_it2 .<= V_fixed_it) - -prop = InfiniteTimeSafety([3], 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = FiniteTimeReward([2.0, 1.0, 0.0], 0.9, 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -prop = FiniteTimeReward([2.0, 1.0, -1.0], 0.9, 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it2, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it2 .<= V_fixed_it) - -prop = InfiniteTimeReward([2.0, 1.0, 0.0], 0.9, 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = InfiniteTimeReward([2.0, 1.0, -1.0], 0.9, 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 +@testset for N in [Float32, Float64, Rational{BigInt}] + prob = IntervalAmbiguitySets(; + lower = N[ + 0 1//2 0 + 1//10 3//10 0 + 1//5 1//10 1 + ], + upper = N[ + 1//2 7//10 0 + 3//5 1//2 0 + 7//10 3//10 1 + ], + ) + + mc = IntervalMarkovChain(prob, [1]) + @test initial_states(mc) == [1] + + mc = IntervalMarkovChain(prob) + + prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + sol = solve(problem) + V_fixed_it, k, res = sol + @test k == 10 + @test all(V_fixed_it .>= N(0)) + @test all(V_fixed_it .<= N(1)) + + @test value_function(sol) == V_fixed_it + @test num_iterations(sol) == k + @test residual(sol) == res + + prop = FiniteTimeReachability([3], 11) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 11 + @test all(V_fixed_it .<= V_fixed_it2) + + prop = InfiniteTimeReachability([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + + prop = FiniteTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it .>= N(0)) + @test all(V_fixed_it .<= N(1)) + + prop = FiniteTimeReachAvoid([3], [2], 11) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 11 + @test all(V_fixed_it2 .>= N(0)) + @test all(V_fixed_it2 .<= N(1)) + @test all(V_fixed_it .<= V_fixed_it2) + + prop = InfiniteTimeReachAvoid([3], [2], N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + + prop = FiniteTimeSafety([3], 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it .>= N(0)) + @test all(V_fixed_it .<= N(1)) + + prop = FiniteTimeSafety([3], 11) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 11 + @test all(V_fixed_it2 .>= N(0)) + @test all(V_fixed_it2 .<= N(1)) + @test all(V_fixed_it2 .<= V_fixed_it) + + prop = InfiniteTimeSafety([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + + prop = FiniteTimeReward(N[2, 1, 0], N(9//10), 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it .>= N(0)) + + prop = FiniteTimeReward(N[2, 1, -1], N(9//10), 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it2 .<= V_fixed_it) + + prop = InfiniteTimeReward(N[2, 1, 0], N(9//10), N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + + prop = InfiniteTimeReward(N[2, 1, -1], N(9//10), N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) +end \ No newline at end of file diff --git a/test/data/bmdp_tool.jl b/test/data/bmdp_tool.jl index 063d341c..2c043b23 100644 --- a/test/data/bmdp_tool.jl +++ b/test/data/bmdp_tool.jl @@ -5,7 +5,7 @@ using IntervalMDP, IntervalMDP.Data, SparseArrays mdp, tstates = read_bmdp_tool_file("data/multiObj_robotIMDP.txt") marginal = marginals(mdp)[1] -ambiguity_sets = marginal.ambiguity_sets +as = ambiguity_sets(marginal) @testset "write/read model,tstates" begin # Write model @@ -22,7 +22,7 @@ ambiguity_sets = marginal.ambiguity_sets @test num_states(mdp) == num_states(new_mdp) new_marginal = marginals(new_mdp)[1] - new_ambiguity_sets = new_marginal.ambiguity_sets + new_as = ambiguity_sets(new_marginal) @test source_shape(marginal) == source_shape(new_marginal) @test action_shape(marginal) == action_shape(new_marginal) @@ -30,8 +30,8 @@ ambiguity_sets = marginal.ambiguity_sets @test state_variables(mdp) == state_variables(new_mdp) @test action_variables(mdp) == action_variables(new_mdp) - @test ambiguity_sets.lower ≈ new_ambiguity_sets.lower - @test ambiguity_sets.gap ≈ new_ambiguity_sets.gap + @test as.lower ≈ new_as.lower + @test as.gap ≈ new_as.gap @test tstates == new_tstates end @@ -55,7 +55,7 @@ end @test num_states(mdp) == num_states(new_mdp) new_marginal = marginals(new_mdp)[1] - new_ambiguity_sets = new_marginal.ambiguity_sets + new_as = ambiguity_sets(new_marginal) @test source_shape(marginal) == source_shape(new_marginal) @test action_shape(marginal) == action_shape(new_marginal) @@ -63,8 +63,8 @@ end @test state_variables(mdp) == state_variables(new_mdp) @test action_variables(mdp) == action_variables(new_mdp) - @test ambiguity_sets.lower ≈ new_ambiguity_sets.lower - @test ambiguity_sets.gap ≈ new_ambiguity_sets.gap + @test as.lower ≈ new_as.lower + @test as.gap ≈ new_as.gap @test tstates == new_tstates end \ No newline at end of file diff --git a/test/data/intervalmdp.jl b/test/data/intervalmdp.jl index 0f3e610d..c97ba1aa 100644 --- a/test/data/intervalmdp.jl +++ b/test/data/intervalmdp.jl @@ -21,8 +21,8 @@ write_intervalmdp_jl_model("data/multiObj_robotIMDP.nc", mdp) marginal = marginals(mdp)[1] new_marginal = marginals(new_mdp)[1] - ambiguity_sets = marginal.ambiguity_sets - new_ambiguity_sets = new_marginal.ambiguity_sets + as = ambiguity_sets(marginal) + new_as = ambiguity_sets(new_marginal) @test source_shape(marginal) == source_shape(new_marginal) @test action_shape(marginal) == action_shape(new_marginal) @@ -30,8 +30,8 @@ write_intervalmdp_jl_model("data/multiObj_robotIMDP.nc", mdp) @test state_variables(mdp) == state_variables(new_mdp) @test action_variables(mdp) == action_variables(new_mdp) - @test ambiguity_sets.lower ≈ new_ambiguity_sets.lower - @test ambiguity_sets.gap ≈ new_ambiguity_sets.gap + @test as.lower ≈ new_as.lower + @test as.gap ≈ new_as.gap end @testset "io specification" begin diff --git a/test/data/prism.jl b/test/data/prism.jl index 01a466dd..19657442 100644 --- a/test/data/prism.jl +++ b/test/data/prism.jl @@ -38,8 +38,8 @@ mdp, new_mdp = system(problem), system(new_problem) marginal = marginals(mdp)[1] new_marginal = marginals(new_mdp)[1] -ambiguity_sets = marginal.ambiguity_sets -new_ambiguity_sets = new_marginal.ambiguity_sets +as = ambiguity_sets(marginal) +new_as = ambiguity_sets(new_marginal) @test source_shape(marginal) == source_shape(new_marginal) @test action_shape(marginal) == action_shape(new_marginal) @@ -47,8 +47,8 @@ new_ambiguity_sets = new_marginal.ambiguity_sets @test state_variables(mdp) == state_variables(new_mdp) @test action_variables(mdp) == action_variables(new_mdp) -@test ambiguity_sets.lower ≈ new_ambiguity_sets.lower -@test ambiguity_sets.gap ≈ new_ambiguity_sets.gap +@test as.lower ≈ new_as.lower +@test as.gap ≈ new_as.gap spec = specification(new_problem) @test satisfaction_mode(spec) == Pessimistic @@ -99,8 +99,8 @@ mdp, new_mdp = system(problem), system(new_problem) marginal = marginals(mdp)[1] new_marginal = marginals(new_mdp)[1] -ambiguity_sets = marginal.ambiguity_sets -new_ambiguity_sets = new_marginal.ambiguity_sets +as = ambiguity_sets(marginal) +new_as = ambiguity_sets(new_marginal) @test source_shape(marginal) == source_shape(new_marginal) @test action_shape(marginal) == action_shape(new_marginal) @@ -108,8 +108,8 @@ new_ambiguity_sets = new_marginal.ambiguity_sets @test state_variables(mdp) == state_variables(new_mdp) @test action_variables(mdp) == action_variables(new_mdp) -@test ambiguity_sets.lower ≈ new_ambiguity_sets.lower -@test ambiguity_sets.gap ≈ new_ambiguity_sets.gap +@test as.lower ≈ new_as.lower +@test as.gap ≈ new_as.gap spec = specification(new_problem) @test satisfaction_mode(spec) == Pessimistic @@ -156,8 +156,8 @@ new_mdp = system(new_problem) marginal = marginals(mdp)[1] new_marginal = marginals(new_mdp)[1] -ambiguity_sets = marginal.ambiguity_sets -new_ambiguity_sets = new_marginal.ambiguity_sets +as = ambiguity_sets(marginal) +new_as = ambiguity_sets(new_marginal) @test source_shape(marginal) == source_shape(new_marginal) @test action_shape(marginal) == action_shape(new_marginal) @@ -165,8 +165,8 @@ new_ambiguity_sets = new_marginal.ambiguity_sets @test state_variables(mdp) == state_variables(new_mdp) @test action_variables(mdp) == action_variables(new_mdp) -@test ambiguity_sets.lower ≈ new_ambiguity_sets.lower -@test ambiguity_sets.gap ≈ new_ambiguity_sets.gap +@test as.lower ≈ new_as.lower +@test as.gap ≈ new_as.gap spec = specification(new_problem) @test satisfaction_mode(spec) == Pessimistic From 5270528495f83515354d912cc8e24865d68415fa Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 11 Sep 2025 13:38:54 +0200 Subject: [PATCH 09/71] Test sparse --- test/base/factored.jl | 4 +- test/sparse/bellman.jl | 181 +++--- test/sparse/factored.jl | 1115 +++++++++++++++++++++++++++++++++++++ test/sparse/imdp.jl | 918 ++++++++++++++++++------------ test/sparse/orthogonal.jl | 248 --------- test/sparse/sparse.jl | 2 +- test/sparse/vi.jl | 233 ++++---- 7 files changed, 1903 insertions(+), 798 deletions(-) create mode 100644 test/sparse/factored.jl delete mode 100644 test/sparse/orthogonal.jl diff --git a/test/base/factored.jl b/test/base/factored.jl index 55be37b3..b821953c 100644 --- a/test/base/factored.jl +++ b/test/base/factored.jl @@ -141,7 +141,7 @@ using Random: MersenneTwister mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) V = N[ - 3 13 18 + 3 13 18 12 16 8 ] eval_vertices(p, q) = sum(V[I] * p[I[1]] * q[I[2]] for I in CartesianIndices(state_vars)) @@ -917,9 +917,7 @@ using Random: MersenneTwister @test res ≈ res_implicit end end -end -@testset for N in [Float32, Float64] # 4-D abstraction @testset "4D abstraction" begin rng = MersenneTwister(995) diff --git a/test/sparse/bellman.jl b/test/sparse/bellman.jl index 528f9468..eaf23363 100644 --- a/test/sparse/bellman.jl +++ b/test/sparse/bellman.jl @@ -1,103 +1,102 @@ using Revise, Test using IntervalMDP, SparseArrays -for N in [Float32, Float64, Rational{BigInt}] - @testset "N = $N" begin - prob = IntervalAmbiguitySets(; - lower = sparse_hcat( - SparseVector(15, [4, 10], N[1 // 10, 2 // 10]), - SparseVector(15, [5, 6, 7], N[5 // 10, 3 // 10, 1 // 10]), - ), - upper = sparse_hcat( - SparseVector(15, [1, 4, 10], N[5 // 10, 6 // 10, 7 // 10]), - SparseVector(15, [5, 6, 7], N[7 // 10, 5 // 10, 3 // 10]), - ), - ) - V = collect(1.0:15.0) +@testset for N in [Float32, Float64, Rational{BigInt}] + prob = IntervalAmbiguitySets(; + lower = sparse_hcat( + SparseVector(15, [4, 10], N[1 // 10, 2 // 10]), + SparseVector(15, [5, 6, 7], N[5 // 10, 3 // 10, 1 // 10]), + ), + upper = sparse_hcat( + SparseVector(15, [1, 4, 10], N[5 // 10, 6 // 10, 7 // 10]), + SparseVector(15, [5, 6, 7], N[7 // 10, 5 // 10, 3 // 10]), + ), + ) - #### Maximization - @testset "maximization" begin - ws = IntervalMDP.construct_workspace(prob, OMaximization()) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = zeros(Float64, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = true, - ) - @test Vres ≈ N[82 // 10, 57 // 10] # [0.3 * 4 + 0.7 * 10, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] + V = collect(N(1):N(15)) - ws = IntervalMDP.SparseIntervalOMaxWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = true, - ) - @test Vres ≈ N[82 // 10, 57 // 10] + #### Maximization + @testset "maximization" begin + ws = IntervalMDP.construct_workspace(prob, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + Vres = zeros(N, 2) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres, + V, + prob; + upper_bound = true, + ) + @test Vres ≈ N[82 // 10, 57 // 10] # [0.3 * 4 + 0.7 * 10, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] - ws = IntervalMDP.ThreadedSparseIntervalOMaxWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = true, - ) - @test Vres ≈ N[82 // 10, 57 // 10] - end + ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + Vres = similar(Vres) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres, + V, + prob; + upper_bound = true, + ) + @test Vres ≈ N[82 // 10, 57 // 10] # [0.3 * 4 + 0.7 * 10, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] - #### Minimization - @testset "minimization" begin - ws = IntervalMDP.construct_workspace(prob, OMaximization()) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = zeros(Float64, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = false, - ) - @test Vres ≈ N[37 // 10, 55 // 10] # [0.5 * 1 + 0.3 * 4 + 0.2 * 10, 0.6 * 5 + 0.3 * 6 + 0.1 * 7] + ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + Vres = similar(Vres) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres, + V, + prob; + upper_bound = true, + ) + @test Vres ≈ N[82 // 10, 57 // 10] # [0.3 * 4 + 0.7 * 10, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] + end - ws = IntervalMDP.SparseIntervalOMaxWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = false, - ) - @test Vres ≈ N[37 // 10, 55 // 10] + #### Minimization + @testset "minimization" begin + ws = IntervalMDP.construct_workspace(prob, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + Vres = zeros(N, 2) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres, + V, + prob; + upper_bound = false, + ) + @test Vres ≈ N[37 // 10, 55 // 10] # [0.5 * 1 + 0.3 * 4 + 0.2 * 10, 0.6 * 5 + 0.3 * 6 + 0.1 * 7] + + ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + Vres = similar(Vres) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres, + V, + prob; + upper_bound = false, + ) + @test Vres ≈ N[37 // 10, 55 // 10] # [0.5 * 1 + 0.3 * 4 + 0.2 * 10, 0.6 * 5 + 0.3 * 6 + 0.1 * 7] - ws = IntervalMDP.ThreadedSparseIntervalOMaxWorkspace(prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = false, - ) - @test Vres ≈ N[37 // 10, 55 // 10] - end + ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + Vres = similar(Vres) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres, + V, + prob; + upper_bound = false, + ) + @test Vres ≈ N[37 // 10, 55 // 10] # [0.5 * 1 + 0.3 * 4 + 0.2 * 10, 0.6 * 5 + 0.3 * 6 + 0.1 * 7] end end diff --git a/test/sparse/factored.jl b/test/sparse/factored.jl new file mode 100644 index 00000000..4781ea82 --- /dev/null +++ b/test/sparse/factored.jl @@ -0,0 +1,1115 @@ +using Revise, Test +using IntervalMDP, SparseArrays +using Random: MersenneTwister + +@testset for N in [Float32, Float64] + @testset "bellman 1d" begin + ambiguity_sets = IntervalAmbiguitySets(; + lower = sparse(N[ + 0 5//10 2//10 + 1//10 0 3//10 + 2//10 1//10 5//10 + ]), + upper = sparse(N[ + 0 7//10 3//10 + 6//10 5//10 4//10 + 7//10 3//10 5//10 + ]), + ) + imc = IntervalMarkovChain(ambiguity_sets) + + V = N[1, 2, 3] + + @testset "maximization" begin + Vexpected = IntervalMDP.bellman(V, imc; upper_bound = true) # Using O-maximization, should be equivalent + + ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = zeros(N, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = true, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = true, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = true, + ) + @test Vres ≈ Vexpected + end + + @testset "minimization" begin + Vexpected = IntervalMDP.bellman(V, imc; upper_bound = false) # Using O-maximization, should be equivalent + + ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = zeros(N, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = false, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = false, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = false, + ) + @test Vres ≈ Vexpected + end + end + + @testset "bellman 2d" begin + state_indices = (1, 2) + action_indices = (1,) + state_vars = (2, 3) + action_vars = (1,) + jₐ = CartesianIndex(1) + + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//15 0 1//15 13//30 4//15 0 + 2//5 7//30 0 11//30 2//15 1//10 + ]), + upper = sparse(N[ + 17//30 7//10 2//3 4//5 7//10 2//3 + 9//10 13//15 9//10 5//6 4//5 14//15 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//30 1//3 1//6 1//15 0 2//15 + 0 1//4 1//6 1//30 2//15 1//30 + 2//15 7//30 1//10 7//30 7//15 1//5 + ]), + upper = sparse(N[ + 2//3 7//15 4//5 11//30 19//30 1//2 + 23//30 4//5 23//30 3//5 7//10 8//15 + 7//15 4//5 23//30 7//10 7//15 23//30 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) + + V = N[ + 3 13 18 + 12 16 8 + ] + eval_vertices(p, q) = sum(V[I] * p[I[1]] * q[I[2]] for I in CartesianIndices(state_vars)) + + #### Maximization + @testset "maximization" begin + V_vertex = [ + maximum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₛ in CartesianIndices(state_vars) + ] # The maximum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + end + + #### Minimization + @testset "minimization" begin + V_vertex = [ + minimum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₛ in CartesianIndices(state_vars) + ] # The minimum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + end + end + + @testset "bellman 2d partial dependence" begin + state_vars = (2, 3) + action_vars = (1,) + jₐ = CartesianIndex(1) + + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 0 7//30 0 13//30 4//15 1//6 + 2//5 7//30 0 11//30 2//15 1//10 + ]), + upper = sparse(N[ + 17//30 7//10 2//3 4//5 7//10 2//3 + 9//10 13//15 9//10 5//6 4//5 14//15 + ]) + ), (1, 2), (1,), (2, 3), (1,)) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//30 1//3 1//6 + 4//15 1//4 1//6 + 2//15 7//30 0 + ]), + upper = sparse(N[ + 2//3 7//15 4//5 + 23//30 4//5 23//30 + 7//15 4//5 23//30 + ]) + ), (2,), (1,), (3,), (1,)) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) + + V = N[ + 3 13 18 + 12 16 8 + ] + eval_vertices(p, q) = sum(V[I] * p[I[1]] * q[I[2]] for I in CartesianIndices(state_vars)) + + #### Maximization + @testset "maximization" begin + V_vertex = [ + maximum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₛ in CartesianIndices(state_vars) + ] # The maximum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + end + + #### Minimization + @testset "minimization" begin + V_vertex = [ + minimum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₛ in CartesianIndices(state_vars) + ] # The minimum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 2, 3) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + end + end + + @testset "bellman 3d" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + action_vars = (1,) + jₐ = CartesianIndex(1) + + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1//10 1//30 1//10 1//15 1//10 1//15 4//15 4//15 1//3 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 1//5 1//5 1//10 1//30 4//15 1//10 1//5 1//6 7//30 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 3//10 1//10 1//15 1//30 2//15 1//6 1//5 1//10 4//15 + ]), + upper = sparse(N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 17//30 13//30 2//5 2//5 2//3 2//5 17//30 2//5 19//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 8//15 7//15 7//15 13//30 8//15 2//5 8//15 17//30 3//5 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 2//5 2//5 11//30 17//30 17//30 1//2 2//5 19//30 13//30 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1//6 7//30 1//15 2//15 1//10 1//3 7//30 1//30 7//30 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 3//10 0 3//10 1//6 3//10 1//5 0 7//30 2//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 1//15 1//3 2//15 1//15 1//5 1//5 1//15 7//30 1//15 + ]), + upper = sparse(N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 13//30 17//30 17//30 13//30 11//30 19//30 8//15 2//5 8//15 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 1//2 8//15 8//15 8//15 8//15 2//5 3//5 2//3 13//30 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 1//3 2//3 17//30 8//15 17//30 3//5 2//5 19//30 11//30 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 7//30 4//15 1//10 1//3 1//5 7//30 1//30 1//5 7//30 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 1//5 4//15 1//10 1//10 1//3 7//30 3//10 1//3 3//10 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 0 1//30 1//15 2//15 1//6 7//30 4//15 4//15 7//30 + ]), + upper = sparse(N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 2//3 17//30 3//5 7//15 19//30 1//2 3//5 1//3 19//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 2//3 17//30 8//15 13//30 13//30 3//5 1//2 8//15 8//15 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 0 2//5 11//30 19//30 19//30 2//5 1//2 7//15 7//15 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + V = N[ + 23, + 27, + 16, + 6, + 26, + 17, + 12, + 9, + 8, + 22, + 1, + 21, + 11, + 24, + 4, + 10, + 13, + 19, + 3, + 14, + 25, + 20, + 18, + 7, + 5, + 15, + 2, + ] + V = reshape(V, 3, 3, 3) + eval_vertices(p, q, r) = sum(V[I] * p[I[1]] * q[I[2]] * r[I[3]] for I in CartesianIndices(state_vars)) + + #### Maximization + @testset "maximization" begin + V_vertex = [ + maximum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal3[jₐ, jₛ]) + ) + ) for jₛ in CartesianIndices(state_vars) + ] # The maximum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + end + + #### Minimization + @testset "minimization" begin + V_vertex = [ + minimum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal3[jₐ, jₛ]) + ) + ) for jₛ in CartesianIndices(state_vars) + ] # The minimum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 3, 3, 3) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + end + end + + @testset "bellman 3d mixed sparse/dense" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + action_vars = (1,) + jₐ = CartesianIndex(1) + + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1//10 1//30 1//10 1//15 1//10 1//15 4//15 4//15 1//3 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 1//5 1//5 1//10 1//30 4//15 1//10 1//5 1//6 7//30 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 3//10 1//10 1//15 1//30 2//15 1//6 1//5 1//10 4//15 + ]), + upper = sparse(N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 17//30 13//30 2//5 2//5 2//3 2//5 17//30 2//5 19//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 8//15 7//15 7//15 13//30 8//15 2//5 8//15 17//30 3//5 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 2//5 2//5 11//30 17//30 17//30 1//2 2//5 19//30 13//30 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1//6 7//30 1//15 2//15 1//10 1//3 7//30 1//30 7//30 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 3//10 0 3//10 1//6 3//10 1//5 0 7//30 2//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 1//15 1//3 2//15 1//15 1//5 1//5 1//15 7//30 1//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 13//30 17//30 17//30 13//30 11//30 19//30 8//15 2//5 8//15 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 1//2 8//15 8//15 8//15 8//15 2//5 3//5 2//3 13//30 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 1//3 2//3 17//30 8//15 17//30 3//5 2//5 19//30 11//30 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 7//30 4//15 1//10 1//3 1//5 7//30 1//30 1//5 7//30 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 1//5 4//15 1//10 1//10 1//3 7//30 3//10 1//3 3//10 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 0 1//30 1//15 2//15 1//6 7//30 4//15 4//15 7//30 + ]), + upper = sparse(N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 2//3 17//30 3//5 7//15 19//30 1//2 3//5 1//3 19//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 2//3 17//30 8//15 13//30 13//30 3//5 1//2 8//15 8//15 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 0 2//5 11//30 19//30 19//30 2//5 1//2 7//15 7//15 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + V = N[ + 23, + 27, + 16, + 6, + 26, + 17, + 12, + 9, + 8, + 22, + 1, + 21, + 11, + 24, + 4, + 10, + 13, + 19, + 3, + 14, + 25, + 20, + 18, + 7, + 5, + 15, + 2, + ] + V = reshape(V, 3, 3, 3) + eval_vertices(p, q, r) = sum(V[I] * p[I[1]] * q[I[2]] * r[I[3]] for I in CartesianIndices(state_vars)) + + #### Maximization + @testset "maximization" begin + V_vertex = [ + maximum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal3[jₐ, jₛ]) + ) + ) for jₛ in CartesianIndices(state_vars) + ] # The maximum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first + end + + #### Minimization + @testset "minimization" begin + V_vertex = [ + minimum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal3[jₐ, jₛ]) + ) + ) for jₛ in CartesianIndices(state_vars) + ] # The minimum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first = zeros(N, 3, 3, 3) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres_first, + V, + mdp; + upper_bound = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first .>= 0.0) + @test all(Vres_first .<= maximum(V)) + @test all(Vres_first .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first + end + end + + @testset "implicit sink state" begin + @testset "first dimension" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + source_dims = (2, 3, 3) + action_vars = (1,) + + # Explicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//15 3//10 0 1//15 3//10 0 1//30 1//3 0 7//30 4//15 0 1//6 1//5 0 1//10 1//5 0 0 7//30 0 7//30 1//5 0 2//15 1//6 0 + 1//5 4//15 0 1//10 1//5 0 3//10 3//10 0 1//10 1//15 0 3//10 3//10 0 7//30 1//5 0 1//10 1//5 0 1//5 1//30 0 1//5 3//10 0 + 4//15 1//30 1 1//5 1//5 1 7//30 4//15 1 2//15 7//30 1 1//5 1//3 1 2//15 1//6 1 1//6 1//3 1 4//15 3//10 1 1//30 3//10 1 + ]), + upper = sparse(N[ + 7//15 17//30 0 13//30 3//5 0 17//30 17//30 0 17//30 13//30 0 3//5 2//3 0 11//30 7//15 0 0 1//2 0 17//30 13//30 0 7//15 13//30 0 + 8//15 1//2 0 3//5 7//15 0 8//15 17//30 0 2//3 17//30 0 11//30 7//15 0 19//30 19//30 0 13//15 1//2 0 17//30 13//30 0 3//5 11//30 0 + 11//30 1//3 1 2//5 8//15 1 7//15 3//5 1 2//3 17//30 1 2//3 8//15 1 2//15 3//5 1 2//3 3//5 1 17//30 2//3 1 7//15 8//15 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//10 1//15 1 3//10 0 0 1//6 1//15 0 1//15 1//6 1 1//6 1//30 0 1//10 1//10 0 1//3 2//15 1 3//10 4//15 0 2//15 2//15 0 + 3//10 1//5 0 3//10 2//15 1 0 1//30 0 0 1//15 0 1//30 7//30 1 1//30 1//15 0 7//30 1//15 0 1//6 1//30 1 1//10 1//15 0 + 3//10 4//15 0 1//10 3//10 0 2//15 1//3 1 3//10 1//10 0 1//6 3//10 0 7//30 1//6 1 1//15 1//15 0 1//10 1//5 0 1//5 4//15 1 + ]), + upper = sparse(N[ + 2//5 17//30 1 3//5 11//30 0 3//5 7//15 0 19//30 2//5 1 3//5 2//3 0 2//3 8//15 0 8//15 19//30 1 8//15 8//15 0 13//30 13//30 0 + 1//3 13//30 0 11//30 2//5 1 2//3 2//3 0 0 13//30 0 1//2 17//30 1 17//30 1//3 0 2//5 1//3 0 13//30 11//30 1 8//15 1//3 0 + 17//30 3//5 0 8//15 1//2 0 7//15 1//2 1 2//3 17//30 0 11//30 2//5 0 1//2 7//15 1 2//5 17//30 0 11//30 2//5 0 11//30 2//3 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 4//15 1//5 1 3//10 3//10 1 4//15 7//30 1 1//5 4//15 0 7//30 1//6 0 1//5 0 0 1//15 1//30 0 3//10 1//3 0 2//15 1//15 0 + 2//15 4//15 0 1//10 1//30 0 7//30 2//15 0 1//15 1//30 1 3//10 1//3 1 1//5 1//10 1 2//15 1//30 0 2//15 4//15 0 0 4//15 0 + 1//5 1//3 0 3//10 1//10 0 1//15 1//10 0 1//30 1//5 0 2//15 7//30 0 1//3 2//15 0 1//10 1//6 1 3//10 1//5 1 7//30 1//30 1 + ]), + upper = sparse(N[ + 3//5 17//30 1 1//2 3//5 1 19//30 2//5 1 8//15 1//3 0 11//30 2//5 0 17//30 13//30 0 2//5 3//5 0 3//5 11//30 0 1//2 11//30 0 + 3//5 2//3 0 13//30 19//30 0 1//3 2//5 0 17//30 7//15 1 11//30 3//5 1 19//30 7//15 1 2//5 8//15 0 17//30 11//30 0 19//30 13//30 0 + 3//5 2//3 0 1//2 1//2 0 2//3 7//15 0 3//5 3//5 0 1//2 1//3 0 2//5 8//15 0 2//5 11//30 1 1//3 8//15 1 7//15 13//30 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + # Implicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ]), + upper = sparse(N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ]), + upper = sparse(N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ]), + upper = sparse(N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + + prop = FiniteTimeSafety([(3, i, j) for i in 1:3 for j in 1:3], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) + implicit_prob = VerificationProblem(implicit_mdp, spec) + + V, k, res = solve(prob) + V_implicit, k_implicit, res_implicit = solve(implicit_prob) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end + + @testset "second dimension" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + source_dims = (3, 2, 3) + action_vars = (1,) + + # Explicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//15 3//10 1//15 3//10 1//30 1//3 1 0 0 7//30 4//15 1//6 1//5 1//10 1//5 1 0 0 0 7//30 7//30 1//5 2//15 1//6 1 0 0 + 1//5 4//15 1//10 1//5 3//10 3//10 0 1 0 1//10 1//15 3//10 3//10 7//30 1//5 0 1 0 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 + 4//15 1//30 1//5 1//5 7//30 4//15 0 0 1 2//15 7//30 1//5 1//3 2//15 1//6 0 0 1 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 + ]), + upper = sparse(N[ + 7//15 17//30 13//30 3//5 17//30 17//30 1 0 0 17//30 13//30 3//5 2//3 11//30 7//15 1 0 0 0 1//2 17//30 13//30 7//15 13//30 1 0 0 + 8//15 1//2 3//5 7//15 8//15 17//30 0 1 0 2//3 17//30 11//30 7//15 19//30 19//30 0 1 0 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 + 11//30 1//3 2//5 8//15 7//15 3//5 0 0 1 2//3 17//30 2//3 8//15 2//15 3//5 0 0 1 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//10 1//15 3//10 0 1//6 1//15 0 0 0 1//15 1//6 1//6 1//30 1//10 1//10 0 0 0 1//3 2//15 3//10 4//15 2//15 2//15 0 0 0 + 3//10 1//5 3//10 2//15 0 1//30 0 0 0 0 1//15 1//30 7//30 1//30 1//15 0 0 0 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 + 3//10 4//15 1//10 3//10 2//15 1//3 1 1 1 3//10 1//10 1//6 3//10 7//30 1//6 1 1 1 1//15 1//15 1//10 1//5 1//5 4//15 1 1 1 + ]), + upper = sparse(N[ + 2//5 17//30 3//5 11//30 3//5 7//15 0 0 0 19//30 2//5 3//5 2//3 2//3 8//15 0 0 0 8//15 19//30 8//15 8//15 13//30 13//30 0 0 0 + 1//3 13//30 11//30 2//5 2//3 2//3 0 0 0 0 13//30 1//2 17//30 17//30 1//3 0 0 0 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 + 17//30 3//5 8//15 1//2 7//15 1//2 1 1 1 2//3 17//30 11//30 2//5 1//2 7//15 1 1 1 2//5 17//30 11//30 2//5 11//30 2//3 1 1 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1 1 1 1//5 4//15 7//30 1//6 1//5 0 0 0 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 + 2//15 4//15 1//10 1//30 7//30 2//15 0 0 0 1//15 1//30 3//10 1//3 1//5 1//10 1 1 1 2//15 1//30 2//15 4//15 0 4//15 0 0 0 + 1//5 1//3 3//10 1//10 1//15 1//10 0 0 0 1//30 1//5 2//15 7//30 1//3 2//15 0 0 0 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 + ]), + upper = sparse(N[ + 3//5 17//30 1//2 3//5 19//30 2//5 1 1 1 8//15 1//3 11//30 2//5 17//30 13//30 0 0 0 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 + 3//5 2//3 13//30 19//30 1//3 2//5 0 0 0 17//30 7//15 11//30 3//5 19//30 7//15 1 1 1 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 + 3//5 2//3 1//2 1//2 2//3 7//15 0 0 0 3//5 3//5 1//2 1//3 2//5 8//15 0 0 0 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + # Implicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ]), + upper = sparse(N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ]), + upper = sparse(N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ]), + upper = sparse(N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + + prop = FiniteTimeSafety([(i, 3, j) for i in 1:3 for j in 1:3], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) + implicit_prob = VerificationProblem(implicit_mdp, spec) + + V, k, res = solve(prob) + V_implicit, k_implicit, res_implicit = solve(implicit_prob) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end + + @testset "last dimension" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + source_dims = (3, 3, 2) + action_vars = (1,) + + # Explicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1 0 0 1 0 0 1 0 0 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 0 1 0 0 1 0 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 0 0 1 0 0 1 + ]), + upper = sparse(N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 1 0 0 1 0 0 1 0 0 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 0 1 0 0 1 0 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 0 0 1 0 0 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1 1 1 0 0 0 0 0 0 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 1 1 1 0 0 0 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 0 0 0 0 0 0 1 1 1 + ]), + upper = sparse(N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 1 1 1 0 0 0 0 0 0 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 1 1 1 0 0 0 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 0 0 0 0 0 0 1 1 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 0 0 0 0 0 0 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 0 0 0 0 0 0 0 0 0 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 1 1 1 1 1 1 + ]), + upper = sparse(N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 0 0 0 0 0 0 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 0 0 0 0 0 0 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 1 1 1 1 1 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + # Implicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ]), + upper = sparse(N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ]), + upper = sparse(N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ]), + upper = sparse(N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + + prop = FiniteTimeSafety([(i, j, 3) for i in 1:3 for j in 1:3], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) + implicit_prob = VerificationProblem(implicit_mdp, spec) + + V, k, res = solve(prob) + V_implicit, k_implicit, res_implicit = solve(implicit_prob) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end + end +end \ No newline at end of file diff --git a/test/sparse/imdp.jl b/test/sparse/imdp.jl index 9e10fa92..ae862fb7 100644 --- a/test/sparse/imdp.jl +++ b/test/sparse/imdp.jl @@ -1,416 +1,636 @@ using Revise, Test using IntervalMDP -prob1 = IntervalAmbiguitySets(; - lower = sparse([ - 0.0 0.5 - 0.1 0.3 - 0.2 0.1 - ]), - upper = sparse([ - 0.5 0.7 - 0.6 0.5 - 0.7 0.3 - ]), -) - -prob2 = IntervalAmbiguitySets(; - lower = sparse([ - 0.1 0.2 - 0.2 0.3 - 0.3 0.4 - ]), - upper = sparse([ - 0.6 0.6 - 0.5 0.5 - 0.4 0.4 - ]), -) - -prob3 = IntervalAmbiguitySets(; - lower = sparse([ - 0.0 0.0 - 0.0 0.0 - 1.0 1.0 - ]), - upper = sparse([ - 0.0 0.0 - 0.0 0.0 - 1.0 1.0 - ]), -) - -transition_probs = [prob1, prob2, prob3] -istates = [Int32(1)] - -mdp = IntervalMarkovDecisionProcess(transition_probs, istates) -@test initial_states(mdp) == istates - -mdp = IntervalMarkovDecisionProcess(transition_probs) - -@testset "explicit sink state" begin - transition_prob = IntervalMDP.interval_prob_hcat(transition_probs) - @test_throws DimensionMismatch IntervalMarkovChain(transition_prob) - - # Finite time reachability - @testset "finite time reachability" begin - prop = FiniteTimeReachability([3], 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - end - # Infinite time reachability - @testset "infinite time reachability" begin - prop = InfiniteTimeReachability([3], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv, _, u = solve(problem) - @test maximum(u) <= 1e-6 - @test all(V_conv .>= 0.0) +@testset for N in [Float32, Float64, Rational{BigInt}] + prob1 = IntervalAmbiguitySets(; + lower = sparse(N[ + 0 1//2 + 1//10 3//10 + 1//5 1//10 + ]), + upper = sparse(N[ + 1//2 7//10 + 3//5 1//2 + 7//10 3//10 + ]), + ) + + prob2 = IntervalAmbiguitySets(; + lower = sparse(N[ + 1//10 1//5 + 1//5 3//10 + 3//10 2//5 + ]), + upper = sparse(N[ + 3//5 3//5 + 1//2 1//2 + 2//5 2//5 + ]), + ) + + prob3 = IntervalAmbiguitySets(; + lower = sparse(N[ + 0 0 + 0 0 + 1 1 + ]), + upper = sparse(N[ + 0 0 + 0 0 + 1 1 + ]) + ) + + transition_probs = [prob1, prob2, prob3] + istates = [1] + + mdp = IntervalMarkovDecisionProcess(transition_probs, istates) + @test initial_states(mdp) == istates + + mdp = IntervalMarkovDecisionProcess(transition_probs) + + @testset "bellman" begin + V = N[1, 2, 3] + Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true) + @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] + + Vres = similar(Vres) + IntervalMDP.bellman!(Vres, V, mdp; upper_bound = false, maximize = true) + @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] end - # Finite time reach avoid - @testset "finite time reach/avoid" begin - prop = FiniteTimeReachAvoid([3], [2], 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) + @testset "explicit sink state" begin + transition_prob = IntervalMDP.interval_prob_hcat(transition_probs) + @test_throws DimensionMismatch IntervalMarkovChain(transition_prob) + + # Finite time reachability + @testset "finite time reachability" begin + prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test V_fixed_it1[3] == N(1) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test V_fixed_it1[3] == N(1) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + end + + # Infinite time reachability + @testset "infinite time reachability" begin + prop = InfiniteTimeReachability([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test V_conv[3] == N(1) + end + + # Exact time reachability + @testset "exact time reachability" begin + prop = ExactTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + + # Compare exact time to finite time + prop = ExactTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + + prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + end + + # Finite time reach avoid + @testset "finite time reach/avoid" begin + prop = FiniteTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + @test V_fixed_it1[3] == N(1) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + @test V_fixed_it2[2] == N(0) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + @test V_fixed_it1[3] == N(1) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + @test V_fixed_it2[2] == N(0) + end + + # Infinite time reach avoid + @testset "infinite time reach/avoid" begin + prop = InfiniteTimeReachAvoid([3], [2], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + @test V_conv[3] == N(1) + @test V_conv[2] == N(0) + end + + # Exact time reach avoid + @testset "exact time reach/avoid" begin + prop = ExactTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[2] == N(0) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[2] == N(0) + + # Compare exact time to finite time + prop = ExactTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + + prop = FiniteTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + end + + # Finite time reward + @testset "finite time reward" begin + prop = FiniteTimeReward(N[2, 1, 0], N(9//10), 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + end + + # Infinite time reward + @testset "infinite time reward" begin + prop = InfiniteTimeReward(N[2, 1, 0], N(9//10), N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + end + + # Expected exit time + @testset "expected exit time" begin + prop = ExpectedExitTime([3], N(1//1_000_000)) + + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv1, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .>= N(0)) + @test V_conv1[3] == N(0) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv2, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .<= V_conv2) + @test V_conv2[3] == N(0) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_conv1, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .>= N(0)) + @test V_conv1[3] == N(0) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_conv2, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .<= V_conv2) + @test V_conv2[3] == N(0) + end end - # Infinite time reach avoid - @testset "infinite time reach/avoid" begin - prop = InfiniteTimeReachAvoid([3], [2], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv, _, u = solve(problem) - @test maximum(u) <= 1e-6 - @test all(V_conv .>= 0.0) - end + @testset "implicit sink state" begin + transition_probs = [prob1, prob2] + implicit_mdp = IntervalMarkovDecisionProcess(transition_probs) - # Finite time reward - @testset "finite time reward" begin - prop = FiniteTimeReward([2.0, 1.0, 0.0], 0.9, 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - end + # Finite time reachability + @testset "finite time reachability" begin + prop = FiniteTimeReachability([3], 10) - # Infinite time reward - @testset "infinite time reward" begin - prop = InfiniteTimeReward([2.0, 1.0, 0.0], 0.9, 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv, _, u = solve(problem) - @test maximum(u) <= 1e-6 - @test all(V_conv .>= 0.0) - end + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - # Expected exit time - @testset "expected exit time" begin - prop = ExpectedExitTime([3], 1e-6) - - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv1, _, u = solve(problem) - @test maximum(u) <= 1e-6 - @test all(V_conv1 .>= 0.0) - @test V_conv1[3] == 0.0 - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv2, _, u = solve(problem) - @test maximum(u) <= 1e-6 - @test all(V_conv1 .<= V_conv2) - @test V_conv2[3] == 0.0 - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_conv1, _, u = solve(problem) - @test maximum(u) <= 1e-6 - @test all(V_conv1 .>= 0.0) - @test V_conv1[3] == 0.0 - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_conv2, _, u = solve(problem) - @test maximum(u) <= 1e-6 - @test all(V_conv1 .<= V_conv2) - @test V_conv2[3] == 0.0 - end -end + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) -@testset "implicit sink state" begin - transition_probs = [prob1, prob2] - implicit_mdp = IntervalMarkovDecisionProcess(transition_probs) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - # Finite time reachability - @testset "finite time reachability" begin - prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + # Infinite time reachability + @testset "infinite time reachability" begin + prop = InfiniteTimeReachability([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - # Infinite time reachability - @testset "infinite time reachability" begin - prop = InfiniteTimeReachability([3], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) + # Exact time reachability + @testset "exact time reachability" begin + prop = ExactTimeReachability([3], 10) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - # Finite time reach avoid - @testset "finite time reach/avoid" begin - prop = FiniteTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + # Finite time reach avoid + @testset "finite time reach/avoid" begin + prop = FiniteTimeReachAvoid([3], [2], 10) - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - # Infinite time reach avoid - @testset "infinite time reach/avoid" begin - prop = InfiniteTimeReachAvoid([3], [2], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - # Finite time reward - @testset "finite time reward" begin - prop = FiniteTimeReward([2.0, 1.0, 0.0], 0.9, 10) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + # Infinite time reach avoid + @testset "infinite time reach/avoid" begin + prop = InfiniteTimeReachAvoid([3], [2], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + # Exact time reach avoid + @testset "exact time reach/avoid" begin + prop = ExactTimeReachAvoid([3], [2], 10) - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit - # Infinite time reward - @testset "infinite time reward" begin - prop = InfiniteTimeReward([2.0, 1.0, 0.0], 0.9, 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end + + # Finite time reward + @testset "finite time reward" begin + prop = FiniteTimeReward(N[2, 1, 0], N(9//10), 10) + + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end + + # Infinite time reward + @testset "infinite time reward" begin + prop = InfiniteTimeReward(N[2, 1, 0], N(9//10), N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - # Expected exit time - @testset "expected exit time" begin - prop = ExpectedExitTime([3], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) + # Expected exit time + @testset "expected exit time" begin + prop = ExpectedExitTime([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end end -end +end \ No newline at end of file diff --git a/test/sparse/orthogonal.jl b/test/sparse/orthogonal.jl deleted file mode 100644 index 6f88cc9e..00000000 --- a/test/sparse/orthogonal.jl +++ /dev/null @@ -1,248 +0,0 @@ -using Revise, Test -using IntervalMDP, SparseArrays -using Random: MersenneTwister - -for N in [Float32, Float64, Rational{BigInt}] - @testset "N = $N" begin - @testset "bellman 1d" begin - prob = OrthogonalIntervalProbabilities( - ( - IntervalProbabilities(; - lower = sparse(N[0 5//10; 1//10 3//10; 2//10 1//10]), - upper = sparse(N[5//10 7//10; 6//10 5//10; 7//10 3//10]), - ), - ), - (Int32(2),), - ) - - V = N[1, 2, 3] - - ws = IntervalMDP.construct_workspace(prob) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = zeros(N, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = true, - ) - @test Vres ≈ N[27 // 10, 17 // 10] # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] - - ws = IntervalMDP.construct_workspace(prob) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = zeros(N, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = false, - ) - @test Vres ≈ N[17 // 10, 15 // 10] # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3] - end - - @testset "bellman 3d" begin - lower1 = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1//10 1//30 1//10 1//15 1//10 1//15 4//15 4//15 1//3 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 1//5 1//5 1//10 1//30 4//15 1//10 1//5 1//6 7//30 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 3//10 1//10 1//15 1//30 2//15 1//6 1//5 1//10 4//15 - ] - lower2 = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1//6 7//30 1//15 2//15 1//10 1//3 7//30 1//30 7//30 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 3//10 0 3//10 1//6 3//10 1//5 0 7//30 2//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 1//15 1//3 2//15 1//15 1//5 1//5 1//15 7//30 1//15 - ] - lower3 = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 7//30 4//15 1//10 1//3 1//5 7//30 1//30 1//5 7//30 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 1//5 4//15 1//10 1//10 1//3 7//30 3//10 1//3 3//10 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 0 1//30 1//15 2//15 1//6 7//30 4//15 4//15 7//30 - ] - - upper1 = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 17//30 13//30 2//5 2//5 2//3 2//5 17//30 2//5 19//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 8//15 7//15 7//15 13//30 8//15 2//5 8//15 17//30 3//5 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 2//5 2//5 11//30 17//30 17//30 1//2 2//5 19//30 13//30 - ] - upper2 = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 13//30 17//30 17//30 13//30 11//30 19//30 8//15 2//5 8//15 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 1//2 8//15 8//15 8//15 8//15 2//5 3//5 2//3 13//30 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 1//3 2//3 17//30 8//15 17//30 3//5 2//5 19//30 11//30 - ] - upper3 = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 2//3 17//30 3//5 7//15 19//30 1//2 3//5 1//3 19//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 2//3 17//30 8//15 13//30 13//30 3//5 1//2 8//15 8//15 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 0 2//5 11//30 19//30 19//30 2//5 1//2 7//15 7//15 - ] - - prob = OrthogonalIntervalProbabilities( - ( - IntervalProbabilities(; lower = lower1, upper = upper1), - IntervalProbabilities(; lower = lower2, upper = upper2), - IntervalProbabilities(; lower = lower3, upper = upper3), - ), - (Int32(3), Int32(3), Int32(3)), - ) - - sparse_prob = OrthogonalIntervalProbabilities( - ( - IntervalProbabilities(; lower = sparse(lower1), upper = sparse(upper1)), - IntervalProbabilities(; lower = sparse(lower2), upper = sparse(upper2)), - IntervalProbabilities(; lower = sparse(lower3), upper = sparse(upper3)), - ), - (Int32(3), Int32(3), Int32(3)), - ) - - V = N[ - 23, - 27, - 16, - 6, - 26, - 17, - 12, - 9, - 8, - 22, - 1, - 21, - 11, - 24, - 4, - 10, - 13, - 19, - 3, - 14, - 25, - 20, - 18, - 7, - 5, - 15, - 2, - ] - V = reshape(V, 3, 3, 3) - - #### Maximization - @testset "maximization" begin - ws = IntervalMDP.construct_workspace(prob) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres_dense = zeros(N, 3, 3, 3) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres_dense, - V, - prob, - stateptr(prob); - upper_bound = true, - ) - - ws = IntervalMDP.construct_workspace(sparse_prob) - strategy_cache = IntervalMDP.construct_strategy_cache(sparse_prob) - Vres = similar(Vres_dense) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - sparse_prob, - stateptr(prob); - upper_bound = true, - ) - @test Vres ≈ Vres_dense - - ws = IntervalMDP.SparseOrthogonalWorkspace(sparse_prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(sparse_prob) - Vres = similar(Vres_dense) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - sparse_prob, - stateptr(prob); - upper_bound = true, - ) - @test Vres ≈ Vres_dense - - ws = IntervalMDP.ThreadedSparseOrthogonalWorkspace(sparse_prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(sparse_prob) - Vres = similar(Vres_dense) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - sparse_prob, - stateptr(prob); - upper_bound = true, - ) - @test Vres ≈ Vres_dense - end - - #### Minimization - @testset "minimization" begin - ws = IntervalMDP.construct_workspace(prob) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres_dense = zeros(N, 3, 3, 3) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres_dense, - V, - prob, - stateptr(prob); - upper_bound = false, - ) - - ws = IntervalMDP.construct_workspace(sparse_prob) - strategy_cache = IntervalMDP.construct_strategy_cache(sparse_prob) - Vres = similar(Vres_dense) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - sparse_prob, - stateptr(prob); - upper_bound = false, - ) - @test Vres ≈ Vres_dense - - ws = IntervalMDP.SparseOrthogonalWorkspace(sparse_prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(sparse_prob) - Vres = similar(Vres_dense) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - sparse_prob, - stateptr(prob); - upper_bound = false, - ) - @test Vres ≈ Vres_dense - - ws = IntervalMDP.ThreadedSparseOrthogonalWorkspace(sparse_prob, 1) - strategy_cache = IntervalMDP.construct_strategy_cache(sparse_prob) - Vres = similar(Vres_dense) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - sparse_prob, - stateptr(prob); - upper_bound = false, - ) - @test Vres ≈ Vres_dense - end - end - end -end diff --git a/test/sparse/sparse.jl b/test/sparse/sparse.jl index da4c6b6c..9cfb2af8 100644 --- a/test/sparse/sparse.jl +++ b/test/sparse/sparse.jl @@ -4,7 +4,7 @@ test_files = [ "vi.jl", "imdp.jl", "synthesis.jl", - # "orthogonal.jl" + "factored.jl" ] for f in test_files diff --git a/test/sparse/vi.jl b/test/sparse/vi.jl index 2182895d..e924044c 100644 --- a/test/sparse/vi.jl +++ b/test/sparse/vi.jl @@ -1,109 +1,130 @@ using Revise, Test using IntervalMDP, SparseArrays -prob = IntervalAmbiguitySets(; - lower = sparse_hcat( - SparseVector(3, [2, 3], [0.1, 0.2]), - SparseVector(3, [1, 2, 3], [0.5, 0.3, 0.1]), - SparseVector(3, [3], [1.0]), - ), - upper = sparse_hcat( - SparseVector(3, [1, 2, 3], [0.5, 0.6, 0.7]), - SparseVector(3, [1, 2, 3], [0.7, 0.5, 0.3]), - SparseVector(3, [3], [1.0]), - ), -) - -mc = IntervalMarkovChain(prob, [1]) - -prop = FiniteTimeReachability([3], 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -prop = FiniteTimeReachability([3], 11) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it2, k, _ = solve(problem) -@test k == 11 -@test all(V_fixed_it .<= V_fixed_it2) - -prop = InfiniteTimeReachability([3], 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = FiniteTimeReachAvoid([3], [2], 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -prop = FiniteTimeReachAvoid([3], [2], 11) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it2, k, _ = solve(problem) -@test k == 11 -@test all(V_fixed_it2 .>= 0.0) -@test all(V_fixed_it .<= V_fixed_it2) - -prop = InfiniteTimeReachAvoid([3], [2], 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = FiniteTimeSafety([3], 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -prop = FiniteTimeSafety([3], 11) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it2, k, _ = solve(problem) -@test k == 11 -@test all(V_fixed_it2 .>= 0.0) -@test all(V_fixed_it2 .<= V_fixed_it) - -prop = InfiniteTimeSafety([3], 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = FiniteTimeReward([2.0, 1.0, 0.0], 0.9, 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -prop = FiniteTimeReward([2.0, 1.0, -1.0], 0.9, 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it2, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it2 .<= V_fixed_it) - -prop = InfiniteTimeReward([2.0, 1.0, 0.0], 0.9, 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = InfiniteTimeReward([2.0, 1.0, -1.0], 0.9, 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 +using Revise, Test +using IntervalMDP + +@testset for N in [Float32, Float64, Rational{BigInt}] + prob = IntervalAmbiguitySets(; + lower = sparse_hcat( + SparseVector(3, [2, 3], N[1//10, 1//5]), + SparseVector(3, [1, 2, 3], N[1//2, 3//10, 1//10]), + SparseVector(3, [3], N[1//1]), + ), + upper = sparse_hcat( + SparseVector(3, [1, 2, 3], N[1//2, 3//5, 7//10]), + SparseVector(3, [1, 2, 3], N[7//10, 1//2, 3//10]), + SparseVector(3, [3], N[1//1]), + ), + ) + + mc = IntervalMarkovChain(prob, [1]) + @test initial_states(mc) == [1] + + mc = IntervalMarkovChain(prob) + + prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + sol = solve(problem) + V_fixed_it, k, res = sol + @test k == 10 + @test all(V_fixed_it .>= N(0)) + @test all(V_fixed_it .<= N(1)) + + @test value_function(sol) == V_fixed_it + @test num_iterations(sol) == k + @test residual(sol) == res + + prop = FiniteTimeReachability([3], 11) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 11 + @test all(V_fixed_it .<= V_fixed_it2) + + prop = InfiniteTimeReachability([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + + prop = FiniteTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it .>= N(0)) + @test all(V_fixed_it .<= N(1)) + + prop = FiniteTimeReachAvoid([3], [2], 11) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 11 + @test all(V_fixed_it2 .>= N(0)) + @test all(V_fixed_it2 .<= N(1)) + @test all(V_fixed_it .<= V_fixed_it2) + + prop = InfiniteTimeReachAvoid([3], [2], N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + + prop = FiniteTimeSafety([3], 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it .>= N(0)) + @test all(V_fixed_it .<= N(1)) + + prop = FiniteTimeSafety([3], 11) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 11 + @test all(V_fixed_it2 .>= N(0)) + @test all(V_fixed_it2 .<= N(1)) + @test all(V_fixed_it2 .<= V_fixed_it) + + prop = InfiniteTimeSafety([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + + prop = FiniteTimeReward(N[2, 1, 0], N(9//10), 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it .>= N(0)) + + prop = FiniteTimeReward(N[2, 1, -1], N(9//10), 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, _ = solve(problem) + @test k == 10 + @test all(V_fixed_it2 .<= V_fixed_it) + + prop = InfiniteTimeReward(N[2, 1, 0], N(9//10), N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + + prop = InfiniteTimeReward(N[2, 1, -1], N(9//10), N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + @test maximum(u) <= N(1//1_000_000) +end \ No newline at end of file From 5fdc8f3b7aa94abd096d9cbd19d91cf8f088ec71 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 11 Sep 2025 16:11:10 +0200 Subject: [PATCH 10/71] Add recursive OMax for fIMDPs --- src/bellman.jl | 556 ++----- .../FactoredRobustMarkovDecisionProcess.jl | 19 +- src/workspace.jl | 64 +- test/base/factored.jl | 1449 +++++++++++------ test/sparse/factored.jl | 787 +++++++-- 5 files changed, 1877 insertions(+), 998 deletions(-) diff --git a/src/bellman.jl b/src/bellman.jl index dd02b1c6..4f6636e4 100644 --- a/src/bellman.jl +++ b/src/bellman.jl @@ -319,7 +319,7 @@ bellman_precomputation!( ) = nothing function state_bellman!( - workspace::IMDPWorkspace, + workspace::Union{DenseIntervalOMaxWorkspace, SparseIntervalOMaxWorkspace}, strategy_cache::OptimizingStrategyCache, Vres, V, @@ -340,7 +340,7 @@ function state_bellman!( end function state_bellman!( - workspace::IMDPWorkspace, + workspace::Union{DenseIntervalOMaxWorkspace, SparseIntervalOMaxWorkspace}, strategy_cache::NonOptimizingStrategyCache, Vres, V, @@ -364,11 +364,7 @@ Base.@propagate_inbounds function state_action_bellman( budget, upper_bound, ) - return dense_sorted_state_action_bellman(V, ambiguity_set, budget, permutation(workspace)) -end - -Base.@propagate_inbounds function dense_sorted_state_action_bellman(V, ambiguity_set, budget, perm) - return dot(V, lower(ambiguity_set)) + gap_value(V, gap(ambiguity_set), budget, perm) + return dot(V, lower(ambiguity_set)) + gap_value(V, gap(ambiguity_set), budget, permutation(workspace)) end Base.@propagate_inbounds function gap_value( @@ -414,7 +410,7 @@ end Base.@propagate_inbounds function gap_value( Vp::VP, budget, -) where {T, VP <: AbstractVector{<:Tuple{T, <:Real}}} +) where {T <: Real, VP <: AbstractVector{<:Tuple{T, T}}} res = zero(T) for (V, p) in Vp @@ -599,389 +595,161 @@ function mccormick_branch(model, ambiguity_sets) end -# ################################################################ -# # Bellman operator for OrthogonalIntervalMarkovDecisionProcess # -# ################################################################ -# function _bellman_helper!( -# workspace::Union{DenseOrthogonalWorkspace, SparseOrthogonalWorkspace, MixtureWorkspace}, -# strategy_cache::AbstractStrategyCache, -# Vres, -# V, -# prob, -# stateptr; -# upper_bound = false, -# maximize = true, -# ) -# bellman_precomputation!(workspace, V, prob, upper_bound) - -# # For each source state -# @inbounds for (jₛ_cart, jₛ_linear) in zip( -# CartesianIndices(source_shape(prob)), -# LinearIndices(source_shape(prob)), -# ) -# state_bellman!( -# workspace, -# strategy_cache, -# Vres, -# V, -# prob, -# stateptr, -# jₛ_cart, -# jₛ_linear; -# upper_bound = upper_bound, -# maximize = maximize, -# ) -# end - -# return Vres -# end - -# function _bellman_helper!( -# workspace::Union{ -# ThreadedDenseOrthogonalWorkspace, -# ThreadedSparseOrthogonalWorkspace, -# ThreadedMixtureWorkspace, -# }, -# strategy_cache::AbstractStrategyCache, -# Vres, -# V, -# prob, -# stateptr; -# upper_bound = false, -# maximize = true, -# ) -# bellman_precomputation!(workspace, V, prob, upper_bound) - -# # For each source state -# I_linear = LinearIndices(source_shape(prob)) -# @threadstid tid for jₛ_cart in CartesianIndices(source_shape(prob)) -# # We can't use @threadstid over a zip, so we need to manually index -# jₛ_linear = I_linear[jₛ_cart] - -# ws = workspace[tid] - -# state_bellman!( -# ws, -# strategy_cache, -# Vres, -# V, -# prob, -# stateptr, -# jₛ_cart, -# jₛ_linear; -# upper_bound = upper_bound, -# maximize = maximize, -# ) -# end - -# return Vres -# end - -# function bellman_precomputation!(workspace::DenseOrthogonalWorkspace, V, prob, upper_bound) -# # Since sorting for the first level is shared among all higher levels, we can precompute it -# product_nstates = num_target(prob) - -# # For each higher-level state in the product space -# for I in CartesianIndices(product_nstates[2:end]) -# sort_dense_orthogonal(workspace, V, I, upper_bound) -# end -# end - -# function bellman_precomputation!( -# workspace::ThreadedDenseOrthogonalWorkspace, -# V, -# prob, -# upper_bound, -# ) -# # Since sorting for the first level is shared among all higher levels, we can precompute it -# product_nstates = num_target(prob) - -# # For each higher-level state in the product space -# @threadstid tid for I in CartesianIndices(product_nstates[2:end]) -# ws = workspace[tid] -# sort_dense_orthogonal(ws, V, I, upper_bound) -# end -# end - -# bellman_precomputation!( -# workspace::Union{SparseOrthogonalWorkspace, ThreadedSparseOrthogonalWorkspace}, -# V, -# prob, -# upper_bound, -# ) = nothing - -# function sort_dense_orthogonal(workspace, V, I, upper_bound) -# @inbounds begin -# perm = @view workspace.permutation[axes(V, 1)] -# sortperm!(perm, @view(V[:, I]); rev = upper_bound, scratch = workspace.scratch) - -# copyto!(@view(first_level_perm(workspace)[:, I]), perm) -# end -# end - -# function state_bellman!( -# workspace::Union{DenseOrthogonalWorkspace, SparseOrthogonalWorkspace, MixtureWorkspace}, -# strategy_cache::OptimizingStrategyCache, -# Vres, -# V, -# prob, -# stateptr, -# jₛ_cart, -# jₛ_linear; -# upper_bound, -# maximize, -# ) -# @inbounds begin -# s₁, s₂ = stateptr[jₛ_linear], stateptr[jₛ_linear + 1] -# act_vals = @view actions(workspace)[1:(s₂ - s₁)] - -# for (i, jₐ) in enumerate(s₁:(s₂ - 1)) -# act_vals[i] = state_action_bellman(workspace, V, prob, jₐ, upper_bound) -# end - -# Vres[jₛ_cart] = extract_strategy!(strategy_cache, act_vals, V, jₛ_cart, maximize) -# end -# end - -# function state_bellman!( -# workspace::Union{DenseOrthogonalWorkspace, SparseOrthogonalWorkspace, MixtureWorkspace}, -# strategy_cache::NonOptimizingStrategyCache, -# Vres, -# V, -# prob, -# stateptr, -# jₛ_cart, -# jₛ_linear; -# upper_bound, -# maximize, -# ) -# @inbounds begin -# s₁ = stateptr[jₛ_linear] -# jₐ = s₁ + strategy_cache[jₛ_cart] - 1 -# Vres[jₛ_cart] = state_action_bellman(workspace, V, prob, jₐ, upper_bound) -# end -# end - -# Base.@propagate_inbounds function state_action_bellman( -# workspace::DenseOrthogonalWorkspace, -# V, -# prob, -# jₐ, -# upper_bound, -# ) -# # The only dimension -# if ndims(prob) == 1 -# return dense_sorted_state_action_bellman( -# V, -# prob[1], -# jₐ, -# first_level_perm(workspace), -# ) -# end - -# Vₑ = workspace.expectation_cache -# product_nstates = num_target(prob) - -# # For each higher-level state in the product space -# for I in CartesianIndices(product_nstates[2:end]) - -# # For the first dimension, we need to copy the values from V -# v = dense_sorted_state_action_bellman( -# @view(V[:, I]), -# prob[1], -# jₐ, -# # Use shared first level permutation across threads -# @view(first_level_perm(workspace)[:, I]), -# ) -# Vₑ[1][I[1]] = v - -# # For the remaining dimensions, if "full", compute expectation and store in the next level -# for d in 2:(ndims(prob) - 1) -# if I[d - 1] == product_nstates[d] -# v = orthogonal_inner_bellman!( -# workspace, -# Vₑ[d - 1], -# prob[d], -# jₐ, -# upper_bound, -# ) -# Vₑ[d][I[d]] = v -# else -# break -# end -# end -# end - -# # Last dimension -# v = orthogonal_inner_bellman!(workspace, Vₑ[end], prob[end], jₐ, upper_bound) - -# return v -# end - -# Base.@propagate_inbounds function orthogonal_inner_bellman!( -# workspace, -# V, -# prob, -# jₐ, -# upper_bound::Bool, -# ) -# perm = @view permutation(workspace)[1:length(V)] - -# # rev=true for upper bound -# sortperm!(perm, V; rev = upper_bound, scratch = scratch(workspace)) - -# return dense_sorted_state_action_bellman(V, prob, jₐ, perm) -# end - -# Base.@propagate_inbounds function state_action_bellman( -# workspace::SparseOrthogonalWorkspace, -# V, -# prob, -# jₐ, -# upper_bound, -# ) -# # This function uses ntuple excessively to avoid allocations (list comprehension requires allocation, while ntuple does not) -# nzinds_first = SparseArrays.nonzeroinds(gap(prob, 1, :, jₐ)) -# nzinds_per_prob = -# ntuple(i -> SparseArrays.nonzeroinds(gap(prob, i + 1, :, jₐ)), ndims(prob) - 1) - -# lower_nzvals_per_prob = ntuple(i -> nonzeros(lower(prob, i, :, jₐ)), ndims(prob)) -# gap_nzvals_per_prob = ntuple(i -> nonzeros(gap(prob, i, :, jₐ)), ndims(prob)) -# sum_lower_per_prob = ntuple(i -> sum_lower(prob, i, jₐ), ndims(prob)) - -# nnz_per_prob = ntuple(i -> nnz(gap(prob, i, :, jₐ)), ndims(prob)) -# Vₑ = ntuple( -# i -> @view(workspace.expectation_cache[i][1:nnz_per_prob[i + 1]]), -# ndims(prob) - 1, -# ) - -# if ndims(prob) == 1 -# # The only dimension -# return orthogonal_sparse_inner_bellman!( -# workspace, -# @view(V[nzinds_first]), -# lower_nzvals_per_prob[end], -# gap_nzvals_per_prob[end], -# sum_lower_per_prob[end], -# upper_bound, -# ) -# end - -# # For each higher-level state in the product space -# for I in CartesianIndices(nnz_per_prob[2:end]) -# Isparse = CartesianIndex(ntuple(d -> nzinds_per_prob[d][I[d]], ndims(prob) - 1)) - -# # For the first dimension, we need to copy the values from V -# v = orthogonal_sparse_inner_bellman!( -# workspace, -# @view(V[nzinds_first, Isparse]), -# lower_nzvals_per_prob[1], -# gap_nzvals_per_prob[1], -# sum_lower_per_prob[1], -# upper_bound, -# ) -# Vₑ[1][I[1]] = v - -# # For the remaining dimensions, if "full", compute expectation and store in the next level -# for d in 2:(ndims(prob) - 1) -# if I[d - 1] == nnz_per_prob[d] -# v = orthogonal_sparse_inner_bellman!( -# workspace, -# Vₑ[d - 1], -# lower_nzvals_per_prob[d], -# gap_nzvals_per_prob[d], -# sum_lower_per_prob[d], -# upper_bound, -# ) -# Vₑ[d][I[d]] = v -# else -# break -# end -# end -# end - -# # Last dimension -# v = orthogonal_sparse_inner_bellman!( -# workspace, -# Vₑ[end], -# lower_nzvals_per_prob[end], -# gap_nzvals_per_prob[end], -# sum_lower_per_prob[end], -# upper_bound, -# ) - -# return v -# end - -# Base.@propagate_inbounds function orthogonal_sparse_inner_bellman!( -# workspace::SparseOrthogonalWorkspace, -# V, -# lower, -# gap, -# sum_lower, -# upper_bound::Bool, -# ) -# Vp_workspace = @view workspace.values_gaps[1:length(gap)] -# for (i, (v, p)) in enumerate(zip(V, gap)) -# Vp_workspace[i] = (v, p) -# end - -# # rev=true for upper bound -# sort!(Vp_workspace; rev = upper_bound, scratch = scratch(workspace)) - -# return dot(V, lower) + gap_value(Vp_workspace, sum_lower) -# end - -# ############################################################# -# # Bellman operator for MixtureIntervalMarkovDecisionProcess # -# ############################################################# -# bellman_precomputation!(workspace::MixtureWorkspace, V, prob, upper_bound) = -# bellman_precomputation!(workspace.orthogonal_workspace, V, prob, upper_bound) - -# function bellman_precomputation!( -# workspace::ThreadedMixtureWorkspace{<:DenseOrthogonalWorkspace}, -# V, -# prob, -# upper_bound, -# ) -# # Since sorting for the first level is shared among all higher levels, we can precompute it -# product_nstates = num_target(prob) - -# # For each higher-level state in the product space -# @threadstid tid for I in CartesianIndices(product_nstates[2:end]) -# ws = workspace[tid] -# sort_dense_orthogonal(ws.orthogonal_workspace, V, I, upper_bound) -# end -# end - -# bellman_precomputation!( -# workspace::ThreadedMixtureWorkspace{<:SparseOrthogonalWorkspace}, -# V, -# prob, -# upper_bound, -# ) = nothing - -# Base.@propagate_inbounds function state_action_bellman( -# workspace::MixtureWorkspace, -# V, -# prob, -# jₐ, -# upper_bound, -# ) -# # Value iteration for each model in the mixture (for source-action pair jₐ) -# for (k, p) in enumerate(prob) -# v = state_action_bellman(workspace.orthogonal_workspace, V, p, jₐ, upper_bound) -# workspace.mixture_cache[k] = v -# end - -# # Combine mixture with weighting probabilities -# v = orthogonal_inner_bellman!( -# workspace, -# workspace.mixture_cache, -# weighting_probs(prob), -# jₐ, -# upper_bound, -# ) - -# return v -# end +################################################## +# O-Max-based Bellman operator for Factored IMDP # +################################################## +function _bellman_helper!( + workspace::FactoredIntervalOMaxWorkspace, + strategy_cache::AbstractStrategyCache, + Vres, + V, + model; + upper_bound = false, + maximize = true, +) + # For each source state + @inbounds for jₛ in CartesianIndices(source_shape(model)) + state_bellman!( + workspace, + strategy_cache, + Vres, + V, + model, + jₛ; + upper_bound = upper_bound, + maximize = maximize, + ) + end + + return Vres +end + +function _bellman_helper!( + workspace::ThreadedFactoredIntervalOMaxWorkspace, + strategy_cache::AbstractStrategyCache, + Vres, + V, + model; + upper_bound = false, + maximize = true, +) + # For each source state + @threadstid tid for jₛ in CartesianIndices(source_shape(model)) + ws = workspace[tid] + + state_bellman!( + ws, + strategy_cache, + Vres, + V, + model, + jₛ; + upper_bound = upper_bound, + maximize = maximize, + ) + end + + return Vres +end + +function state_bellman!( + workspace::FactoredIntervalOMaxWorkspace, + strategy_cache::OptimizingStrategyCache, + Vres, + V, + model::FactoredRMDP{N}, + jₛ; + upper_bound, + maximize, +) where {N} + @inbounds begin + for jₐ in CartesianIndices(action_shape(model)) + ambiguity_sets = getindex.(marginals(model), jₐ, jₛ) + budgets = ntuple(r -> workspace.budgets[r][sub2ind(marginals(model)[r], jₐ, jₛ)], N) + workspace.actions[jₐ] = state_action_bellman(workspace, V, model, ambiguity_sets, budgets, upper_bound) + end + + Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, maximize) + end +end + +function state_bellman!( + workspace::FactoredIntervalOMaxWorkspace, + strategy_cache::NonOptimizingStrategyCache, + Vres, + V, + model::FactoredRMDP{N}, + jₛ; + upper_bound, + maximize, +) where {N} + @inbounds begin + jₐ = CartesianIndex(strategy_cache[jₛ]) + ambiguity_sets = getindex.(marginals(model), jₐ, jₛ) + budgets = ntuple(r -> workspace.budgets[r][sub2ind(marginals(model)[r], jₐ, jₛ)], N) + Vres[jₛ] = state_action_bellman(workspace, V, model, ambiguity_sets, budgets, upper_bound) + end +end + +Base.@propagate_inbounds function state_action_bellman( + workspace::FactoredIntervalOMaxWorkspace, + V, + model, + ambiguity_sets, + budgets, + upper_bound, +) + Vₑ = workspace.expectation_cache + + # For each higher-level state in the product space + for I in CartesianIndices(state_variables(model)[2:end]) + # For the first dimension, we need to copy the values from V + v = orthogonal_inner_bellman!( + workspace, + @view(V[:, I]), + ambiguity_sets[1], + budgets[1], + upper_bound + ) + Vₑ[1][I[1]] = v + + # For the remaining dimensions, if "full", compute expectation and store in the next level + for d in 2:(length(ambiguity_sets) - 1) + if I[d - 1] == state_variables(model, d) + v = orthogonal_inner_bellman!( + workspace, + Vₑ[d - 1], + ambiguity_sets[d], + budgets[d], + upper_bound, + ) + Vₑ[d][I[d]] = v + else + break + end + end + end + + # Last dimension + v = orthogonal_inner_bellman!(workspace, Vₑ[end], ambiguity_sets[end], budgets[end], upper_bound) + + return v +end + +Base.@propagate_inbounds function orthogonal_inner_bellman!( + workspace, + V, + ambiguity_set, + budget, + upper_bound::Bool, +) + Vp_workspace = @view workspace.values_gaps[1:length(support(ambiguity_set))] + @inbounds for (i, j) in enumerate(support(ambiguity_set)) + Vp_workspace[i] = (V[j], gap(ambiguity_set, j)) + end + + # rev=true for upper bound + sort!(Vp_workspace; rev = upper_bound, by = first, scratch = scratch(workspace)) + + return dot(V, lower(ambiguity_set)) + gap_value(Vp_workspace, budget) +end diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl index 5693dee3..d1ead9b8 100644 --- a/src/models/FactoredRobustMarkovDecisionProcess.jl +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -108,15 +108,16 @@ function check_initial_states(state_vars, initial_states) end end -state_variables(rmdp::FactoredRobustMarkovDecisionProcess) = rmdp.state_vars -action_variables(rmdp::FactoredRobustMarkovDecisionProcess) = rmdp.action_vars -num_states(rmdp::FactoredRobustMarkovDecisionProcess) = prod(state_variables(rmdp)) -num_actions(rmdp::FactoredRobustMarkovDecisionProcess) = prod(action_variables(rmdp)) -marginals(rmdp::FactoredRobustMarkovDecisionProcess) = rmdp.transition -initial_states(rmdp::FactoredRobustMarkovDecisionProcess) = rmdp.initial_states - -source_shape(m::FactoredRobustMarkovDecisionProcess) = m.source_dims -action_shape(m::FactoredRobustMarkovDecisionProcess) = m.action_vars +state_variables(rmdp::FactoredRMDP) = rmdp.state_vars +state_variables(rmdp::FactoredRMDP, r) = rmdp.state_vars[r] +action_variables(rmdp::FactoredRMDP) = rmdp.action_vars +num_states(rmdp::FactoredRMDP) = prod(state_variables(rmdp)) +num_actions(rmdp::FactoredRMDP) = prod(action_variables(rmdp)) +marginals(rmdp::FactoredRMDP) = rmdp.transition +initial_states(rmdp::FactoredRMDP) = rmdp.initial_states + +source_shape(m::FactoredRMDP) = m.source_dims +action_shape(m::FactoredRMDP) = m.action_vars function Base.getindex(rmdp::FactoredRMDP, r) return rmdp.transition[r] diff --git a/src/workspace.jl b/src/workspace.jl index fb55e2f7..5e24d5a6 100644 --- a/src/workspace.jl +++ b/src/workspace.jl @@ -26,8 +26,6 @@ end construct_workspace(mdp::FactoredRMDP, bellman_alg; kwargs...) = construct_workspace(mdp, modeltype(mdp), bellman_alg; kwargs...) -abstract type IMDPWorkspace end - function construct_workspace( sys::FactoredRMDP, ::IsIMDP, @@ -40,7 +38,7 @@ function construct_workspace( end # Dense -struct DenseIntervalOMaxWorkspace{T <: Real} <: IMDPWorkspace +struct DenseIntervalOMaxWorkspace{T <: Real} budget::Vector{T} scratch::Vector{Int32} permutation::Vector{Int32} @@ -58,7 +56,7 @@ end permutation(ws::DenseIntervalOMaxWorkspace) = ws.permutation scratch(ws::DenseIntervalOMaxWorkspace) = ws.scratch -struct ThreadedDenseIntervalOMaxWorkspace{T <: Real} <: IMDPWorkspace +struct ThreadedDenseIntervalOMaxWorkspace{T <: Real} thread_workspaces::Vector{DenseIntervalOMaxWorkspace{T}} end @@ -93,7 +91,7 @@ function construct_workspace( end # Sparse -struct SparseIntervalOMaxWorkspace{T <: Real} <: IMDPWorkspace +struct SparseIntervalOMaxWorkspace{T <: Real} budget::Vector{T} scratch::Vector{Tuple{T, T}} values_gaps::Vector{Tuple{T, T}} @@ -112,7 +110,7 @@ end scratch(ws::SparseIntervalOMaxWorkspace) = ws.scratch -struct ThreadedSparseIntervalOMaxWorkspace{T <: Real} <: IMDPWorkspace +struct ThreadedSparseIntervalOMaxWorkspace{T <: Real} thread_workspaces::Vector{SparseIntervalOMaxWorkspace{T}} end @@ -154,7 +152,7 @@ function FactoredIntervalMcCormickWorkspace(sys, alg) return FactoredIntervalMcCormickWorkspace(model, actions) end -struct ThreadedFactoredIntervalMcCormickWorkspace{M <: JuMP.Model, T <: Real, AT <: AbstractArray{T}} <: IMDPWorkspace +struct ThreadedFactoredIntervalMcCormickWorkspace{M <: JuMP.Model, T <: Real, AT <: AbstractArray{T}} thread_workspaces::Vector{FactoredIntervalMcCormickWorkspace{M, T, AT}} end @@ -177,4 +175,56 @@ function construct_workspace( else return ThreadedFactoredIntervalMcCormickWorkspace(sys, alg) end +end + +# Factored interval o-max workspace +struct FactoredIntervalOMaxWorkspace{N, M, T <: Real, AT <: AbstractArray{T}} + expectation_cache::NTuple{M, Vector{T}} + values_gaps::Vector{Tuple{T, T}} + scratch::Vector{Tuple{T, T}} + budgets::NTuple{N, Vector{T}} + actions::AT +end + +function FactoredIntervalOMaxWorkspace(sys::FactoredRMDP) + N = length(marginals(sys)) + R = valuetype(sys) + + max_support_per_marginal = Tuple(maximum(map(length ∘ support, ambiguity_sets(marginal))) for marginal in marginals(sys)) + max_support = maximum(max_support_per_marginal) + + expectation_cache = NTuple{N - 1, Vector{R}}(Vector{R}(undef, n) for n in max_support_per_marginal[2:end]) + values_gaps = Vector{Tuple{R, R}}(undef, max_support) + scratch = Vector{Tuple{R, R}}(undef, max_support) + + budgets = ntuple(r -> one(R) .- vec(sum(ambiguity_sets(sys[r]).lower; dims = 1)), N) + actions = Array{R}(undef, action_shape(sys)) + + return FactoredIntervalOMaxWorkspace(expectation_cache, values_gaps, scratch, budgets, actions) +end +scratch(ws::FactoredIntervalOMaxWorkspace) = ws.scratch + +struct ThreadedFactoredIntervalOMaxWorkspace{N, M, T <: Real, AT <: AbstractArray{T}} + thread_workspaces::Vector{FactoredIntervalOMaxWorkspace{N, M, T, AT}} +end + +function ThreadedFactoredIntervalOMaxWorkspace(sys::FactoredRMDP) + nthreads = Threads.nthreads() + thread_workspaces = [FactoredIntervalOMaxWorkspace(sys) for _ in 1:nthreads] + return ThreadedFactoredIntervalOMaxWorkspace(thread_workspaces) +end +Base.getindex(ws::ThreadedFactoredIntervalOMaxWorkspace, i) = ws.thread_workspaces[i] + +function construct_workspace( + sys::FactoredRMDP, + ::IsFIMDP, + ::OMaximization; + threshold = 10, + kwargs... +) + if Threads.nthreads() == 1 || num_states(sys) <= threshold + return FactoredIntervalOMaxWorkspace(sys) + else + return ThreadedFactoredIntervalOMaxWorkspace(sys) + end end \ No newline at end of file diff --git a/test/base/factored.jl b/test/base/factored.jl index b821953c..cc0afbd4 100644 --- a/test/base/factored.jl +++ b/test/base/factored.jl @@ -160,24 +160,24 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 2, 3) + Vres_first_McCormick = zeros(N, 2, 3) IntervalMDP.bellman!( ws, strategy_cache, - Vres_first, + Vres_first_McCormick, V, mdp; upper_bound = true, ) epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .+ epsilon .>= V_vertex) + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -186,11 +186,11 @@ using Random: MersenneTwister mdp; upper_bound = true, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -199,7 +199,50 @@ using Random: MersenneTwister mdp; upper_bound = true, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first_OMax end #### Minimization @@ -216,24 +259,24 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 2, 3) + Vres_first_McCormick = zeros(N, 2, 3) IntervalMDP.bellman!( ws, strategy_cache, - Vres_first, + Vres_first_McCormick, V, mdp; upper_bound = false, ) epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .- epsilon .<= V_vertex) + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .- epsilon .<= V_vertex) ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -242,11 +285,11 @@ using Random: MersenneTwister mdp; upper_bound = false, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -255,14 +298,56 @@ using Random: MersenneTwister mdp; upper_bound = false, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first_OMax end end @testset "bellman 2d partial dependence" begin state_vars = (2, 3) - action_vars = (1,) - jₐ = CartesianIndex(1) + action_vars = (1, 2) marginal1 = Marginal(IntervalAmbiguitySets(; lower = N[ @@ -277,16 +362,16 @@ using Random: MersenneTwister marginal2 = Marginal(IntervalAmbiguitySets(; lower = N[ - 1//30 1//3 1//6 - 4//15 1//4 1//6 - 2//15 7//30 1//10 + 1//30 1//3 1//6 1//15 2//5 2//15 + 4//15 1//4 1//6 1//30 2//15 1//30 + 2//15 7//30 1//10 7//30 7//15 1//5 ], upper = N[ - 2//3 7//15 4//5 - 23//30 4//5 23//30 - 7//15 4//5 23//30 + 2//3 7//15 4//5 11//30 19//30 1//2 + 23//30 4//5 23//30 3//5 7//10 8//15 + 7//15 4//5 23//30 7//10 7//15 23//30 ] - ), (2,), (1,), (3,), (1,)) + ), (2,), (2,), (3,), (2,)) mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) @@ -297,37 +382,40 @@ using Random: MersenneTwister eval_vertices(p, q) = sum(V[I] * p[I[1]] * q[I[2]] for I in CartesianIndices(state_vars)) #### Maximization - @testset "maximization" begin + @testset "max/max" begin V_vertex = [ maximum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) + maximum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₐ in CartesianIndices(action_vars) ) for jₛ in CartesianIndices(state_vars) - ] # The maximum will always be a vertex + ] # The (inner) maximum will always be a vertex ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 2, 3) + Vres_first_McCormick = zeros(N, 2, 3) IntervalMDP.bellman!( ws, strategy_cache, - Vres_first, + Vres_first_McCormick, V, mdp; upper_bound = true, + maximize = true, ) epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .+ epsilon .>= V_vertex) + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -335,12 +423,13 @@ using Random: MersenneTwister V, mdp; upper_bound = true, + maximize = true, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -348,42 +437,198 @@ using Random: MersenneTwister V, mdp; upper_bound = true, + maximize = true, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = true, + maximize = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + maximize = true, + ) + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + maximize = true, + ) + @test Vres ≈ Vres_first_OMax + end + + @testset "min/max" begin + V_vertex = [ + minimum( + maximum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₐ in CartesianIndices(action_vars) + ) for jₛ in CartesianIndices(state_vars) + ] # The (inner) maximum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_McCormick = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_McCormick, + V, + mdp; + upper_bound = true, + maximize = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_McCormick) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + maximize = false, + ) + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_McCormick) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + maximize = false, + ) + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = true, + maximize = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + maximize = false, + ) + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + maximize = false, + ) + @test Vres ≈ Vres_first_OMax end #### Minimization - @testset "minimization" begin + @testset "min/min" begin V_vertex = [ minimum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) + minimum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₐ in CartesianIndices(action_vars) ) for jₛ in CartesianIndices(state_vars) - ] # The minimum will always be a vertex + ] # The (inner) minimum will always be a vertex ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 2, 3) - IntervalMDP._bellman_helper!( + Vres_first_McCormick = zeros(N, 2, 3) + IntervalMDP.bellman!( ws, strategy_cache, - Vres_first, + Vres_first_McCormick, V, mdp; upper_bound = false, + maximize = false, ) epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .- epsilon .<= V_vertex) + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .- epsilon .<= V_vertex) ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -391,12 +636,13 @@ using Random: MersenneTwister V, mdp; upper_bound = false, + maximize = false, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -404,8 +650,161 @@ using Random: MersenneTwister V, mdp; upper_bound = false, + maximize = false, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = false, + maximize = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + maximize = false, + ) + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + maximize = false, + ) + @test Vres ≈ Vres_first_OMax + end + + @testset "max/min" begin + V_vertex = [ + maximum( + minimum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₐ in CartesianIndices(action_vars) + ) for jₛ in CartesianIndices(state_vars) + ] # The (inner) minimum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_McCormick = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_McCormick, + V, + mdp; + upper_bound = false, + maximize = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_McCormick) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + maximize = true, + ) + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_McCormick) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + maximize = true, + ) + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = false, + maximize = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + maximize = true, + ) + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + maximize = true, + ) + @test Vres ≈ Vres_first_OMax end end @@ -504,24 +903,24 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 3, 3, 3) + Vres_first_McCormick = zeros(N, 3, 3, 3) IntervalMDP.bellman!( ws, strategy_cache, - Vres_first, + Vres_first_McCormick, V, mdp; upper_bound = true, ) epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .+ epsilon .>= V_vertex) + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -530,11 +929,41 @@ using Random: MersenneTwister mdp; upper_bound = true, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) IntervalMDP.bellman!( ws, strategy_cache, @@ -543,7 +972,20 @@ using Random: MersenneTwister mdp; upper_bound = true, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first_OMax end #### Minimization @@ -561,24 +1003,24 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 3, 3, 3) - IntervalMDP._bellman_helper!( + Vres_first_McCormick = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( ws, strategy_cache, - Vres_first, + Vres_first_McCormick, V, mdp; upper_bound = false, ) epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .- epsilon .<= V_vertex) + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .- epsilon .<= V_vertex) ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -587,11 +1029,11 @@ using Random: MersenneTwister mdp; upper_bound = false, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -600,444 +1042,489 @@ using Random: MersenneTwister mdp; upper_bound = false, ) - @test Vres ≈ Vres_first - end - end - - @testset "implicit sink state" begin - @testset "first dimension" begin - state_indices = (1, 2, 3) - action_indices = (1,) - state_vars = (3, 3, 3) - source_dims = (2, 3, 3) - action_vars = (1,) - - # Explicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 0 1//15 3//10 0 1//30 1//3 0 7//30 4//15 0 1//6 1//5 0 1//10 1//5 0 0 7//30 0 7//30 1//5 0 2//15 1//6 0 - 1//5 4//15 0 1//10 1//5 0 3//10 3//10 0 1//10 1//15 0 3//10 3//10 0 7//30 1//5 0 1//10 1//5 0 1//5 1//30 0 1//5 3//10 0 - 4//15 1//30 1 1//5 1//5 1 7//30 4//15 1 2//15 7//30 1 1//5 1//3 1 2//15 1//6 1 1//6 1//3 1 4//15 3//10 1 1//30 3//10 1 - ], - upper = N[ - 7//15 17//30 0 13//30 3//5 0 17//30 17//30 0 17//30 13//30 0 3//5 2//3 0 11//30 7//15 0 0 1//2 0 17//30 13//30 0 7//15 13//30 0 - 8//15 1//2 0 3//5 7//15 0 8//15 17//30 0 2//3 17//30 0 11//30 7//15 0 19//30 19//30 0 13//15 1//2 0 17//30 13//30 0 3//5 11//30 0 - 11//30 1//3 1 2//5 8//15 1 7//15 3//5 1 2//3 17//30 1 2//3 8//15 1 2//15 3//5 1 2//3 3//5 1 17//30 2//3 1 7//15 8//15 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 1 3//10 0 0 1//6 1//15 0 1//15 1//6 1 1//6 1//30 0 1//10 1//10 0 1//3 2//15 1 3//10 4//15 0 2//15 2//15 0 - 3//10 1//5 0 3//10 2//15 1 0 1//30 0 0 1//15 0 1//30 7//30 1 1//30 1//15 0 7//30 1//15 0 1//6 1//30 1 1//10 1//15 0 - 3//10 4//15 0 1//10 3//10 0 2//15 1//3 1 3//10 1//10 0 1//6 3//10 0 7//30 1//6 1 1//15 1//15 0 1//10 1//5 0 1//5 4//15 1 - ], - upper = N[ - 2//5 17//30 1 3//5 11//30 0 3//5 7//15 0 19//30 2//5 1 3//5 2//3 0 2//3 8//15 0 8//15 19//30 1 8//15 8//15 0 13//30 13//30 0 - 1//3 13//30 0 11//30 2//5 1 2//3 2//3 0 0 13//30 0 1//2 17//30 1 17//30 1//3 0 2//5 1//3 0 13//30 11//30 1 8//15 1//3 0 - 17//30 3//5 0 8//15 1//2 0 7//15 1//2 1 2//3 17//30 0 11//30 2//5 0 1//2 7//15 1 2//5 17//30 0 11//30 2//5 0 11//30 2//3 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 1 3//10 3//10 1 4//15 7//30 1 1//5 4//15 0 7//30 1//6 0 1//5 0 0 1//15 1//30 0 3//10 1//3 0 2//15 1//15 0 - 2//15 4//15 0 1//10 1//30 0 7//30 2//15 0 1//15 1//30 1 3//10 1//3 1 1//5 1//10 1 2//15 1//30 0 2//15 4//15 0 0 4//15 0 - 1//5 1//3 0 3//10 1//10 0 1//15 1//10 0 1//30 1//5 0 2//15 7//30 0 1//3 2//15 0 1//10 1//6 1 3//10 1//5 1 7//30 1//30 1 - ], - upper = N[ - 3//5 17//30 1 1//2 3//5 1 19//30 2//5 1 8//15 1//3 0 11//30 2//5 0 17//30 13//30 0 2//5 3//5 0 3//5 11//30 0 1//2 11//30 0 - 3//5 2//3 0 13//30 19//30 0 1//3 2//5 0 17//30 7//15 1 11//30 3//5 1 19//30 7//15 1 2//5 8//15 0 17//30 11//30 0 19//30 13//30 0 - 3//5 2//3 0 1//2 1//2 0 2//3 7//15 0 3//5 3//5 0 1//2 1//3 0 2//5 8//15 0 2//5 11//30 1 1//3 8//15 1 7//15 13//30 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) - - # Implicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ] - ), state_indices, action_indices, source_dims, action_vars) - - implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) - - prop = FiniteTimeSafety([(3, i, j) for i in 1:3 for j in 1:3], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(mdp, spec) - implicit_prob = VerificationProblem(implicit_mdp, spec) + @test Vres ≈ Vres_first_McCormick - V, k, res = solve(prob) - V_implicit, k_implicit, res_implicit = solve(implicit_prob) - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = false, + ) - @testset "second dimension" begin - state_indices = (1, 2, 3) - action_indices = (1,) - state_vars = (3, 3, 3) - source_dims = (3, 2, 3) - action_vars = (1,) - - # Explicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 1 0 0 7//30 4//15 1//6 1//5 1//10 1//5 1 0 0 0 7//30 7//30 1//5 2//15 1//6 1 0 0 - 1//5 4//15 1//10 1//5 3//10 3//10 0 1 0 1//10 1//15 3//10 3//10 7//30 1//5 0 1 0 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 - 4//15 1//30 1//5 1//5 7//30 4//15 0 0 1 2//15 7//30 1//5 1//3 2//15 1//6 0 0 1 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 1 0 0 17//30 13//30 3//5 2//3 11//30 7//15 1 0 0 0 1//2 17//30 13//30 7//15 13//30 1 0 0 - 8//15 1//2 3//5 7//15 8//15 17//30 0 1 0 2//3 17//30 11//30 7//15 19//30 19//30 0 1 0 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 - 11//30 1//3 2//5 8//15 7//15 3//5 0 0 1 2//3 17//30 2//3 8//15 2//15 3//5 0 0 1 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 0 0 0 1//15 1//6 1//6 1//30 1//10 1//10 0 0 0 1//3 2//15 3//10 4//15 2//15 2//15 0 0 0 - 3//10 1//5 3//10 2//15 0 1//30 0 0 0 0 1//15 1//30 7//30 1//30 1//15 0 0 0 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 - 3//10 4//15 1//10 3//10 2//15 1//3 1 1 1 3//10 1//10 1//6 3//10 7//30 1//6 1 1 1 1//15 1//15 1//10 1//5 1//5 4//15 1 1 1 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 0 0 0 19//30 2//5 3//5 2//3 2//3 8//15 0 0 0 8//15 19//30 8//15 8//15 13//30 13//30 0 0 0 - 1//3 13//30 11//30 2//5 2//3 2//3 0 0 0 0 13//30 1//2 17//30 17//30 1//3 0 0 0 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 - 17//30 3//5 8//15 1//2 7//15 1//2 1 1 1 2//3 17//30 11//30 2//5 1//2 7//15 1 1 1 2//5 17//30 11//30 2//5 11//30 2//3 1 1 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1 1 1 1//5 4//15 7//30 1//6 1//5 0 0 0 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 - 2//15 4//15 1//10 1//30 7//30 2//15 0 0 0 1//15 1//30 3//10 1//3 1//5 1//10 1 1 1 2//15 1//30 2//15 4//15 0 4//15 0 0 0 - 1//5 1//3 3//10 1//10 1//15 1//10 0 0 0 1//30 1//5 2//15 7//30 1//3 2//15 0 0 0 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 1 1 1 8//15 1//3 11//30 2//5 17//30 13//30 0 0 0 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 - 3//5 2//3 13//30 19//30 1//3 2//5 0 0 0 17//30 7//15 11//30 3//5 19//30 7//15 1 1 1 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 - 3//5 2//3 1//2 1//2 2//3 7//15 0 0 0 3//5 3//5 1//2 1//3 2//5 8//15 0 0 0 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) - - # Implicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ] - ), state_indices, action_indices, source_dims, action_vars) - - implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) - - prop = FiniteTimeSafety([(i, 3, j) for i in 1:3 for j in 1:3], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(mdp, spec) - implicit_prob = VerificationProblem(implicit_mdp, spec) + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .- epsilon .<= V_vertex) - V, k, res = solve(prob) - V_implicit, k_implicit, res_implicit = solve(implicit_prob) + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first_OMax - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first_OMax end + end - @testset "last dimension" begin - state_indices = (1, 2, 3) - action_indices = (1,) - state_vars = (3, 3, 3) - source_dims = (3, 3, 2) - action_vars = (1,) - - # Explicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1 0 0 1 0 0 1 0 0 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 0 1 0 0 1 0 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 0 0 1 0 0 1 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 1 0 0 1 0 0 1 0 0 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 0 1 0 0 1 0 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 0 0 1 0 0 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1 1 1 0 0 0 0 0 0 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 1 1 1 0 0 0 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 0 0 0 0 0 0 1 1 1 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 1 1 1 0 0 0 0 0 0 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 1 1 1 0 0 0 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 0 0 0 0 0 0 1 1 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 0 0 0 0 0 0 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 0 0 0 0 0 0 0 0 0 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 1 1 1 1 1 1 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 0 0 0 0 0 0 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 0 0 0 0 0 0 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 1 1 1 1 1 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) - - # Implicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ] - ), state_indices, action_indices, source_dims, action_vars) - - implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) - - prop = FiniteTimeSafety([(i, j, 3) for i in 1:3 for j in 1:3], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(mdp, spec) - implicit_prob = VerificationProblem(implicit_mdp, spec) + @testset for alg in [RobustValueIteration(LPMcCormickRelaxation()), RobustValueIteration(OMaximization())] + @testset "implicit sink state" begin + @testset "first dimension" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + source_dims = (2, 3, 3) + action_vars = (1,) + + # Explicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 0 1//15 3//10 0 1//30 1//3 0 7//30 4//15 0 1//6 1//5 0 1//10 1//5 0 0 7//30 0 7//30 1//5 0 2//15 1//6 0 + 1//5 4//15 0 1//10 1//5 0 3//10 3//10 0 1//10 1//15 0 3//10 3//10 0 7//30 1//5 0 1//10 1//5 0 1//5 1//30 0 1//5 3//10 0 + 4//15 1//30 1 1//5 1//5 1 7//30 4//15 1 2//15 7//30 1 1//5 1//3 1 2//15 1//6 1 1//6 1//3 1 4//15 3//10 1 1//30 3//10 1 + ], + upper = N[ + 7//15 17//30 0 13//30 3//5 0 17//30 17//30 0 17//30 13//30 0 3//5 2//3 0 11//30 7//15 0 0 1//2 0 17//30 13//30 0 7//15 13//30 0 + 8//15 1//2 0 3//5 7//15 0 8//15 17//30 0 2//3 17//30 0 11//30 7//15 0 19//30 19//30 0 13//15 1//2 0 17//30 13//30 0 3//5 11//30 0 + 11//30 1//3 1 2//5 8//15 1 7//15 3//5 1 2//3 17//30 1 2//3 8//15 1 2//15 3//5 1 2//3 3//5 1 17//30 2//3 1 7//15 8//15 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 1 3//10 0 0 1//6 1//15 0 1//15 1//6 1 1//6 1//30 0 1//10 1//10 0 1//3 2//15 1 3//10 4//15 0 2//15 2//15 0 + 3//10 1//5 0 3//10 2//15 1 0 1//30 0 0 1//15 0 1//30 7//30 1 1//30 1//15 0 7//30 1//15 0 1//6 1//30 1 1//10 1//15 0 + 3//10 4//15 0 1//10 3//10 0 2//15 1//3 1 3//10 1//10 0 1//6 3//10 0 7//30 1//6 1 1//15 1//15 0 1//10 1//5 0 1//5 4//15 1 + ], + upper = N[ + 2//5 17//30 1 3//5 11//30 0 3//5 7//15 0 19//30 2//5 1 3//5 2//3 0 2//3 8//15 0 8//15 19//30 1 8//15 8//15 0 13//30 13//30 0 + 1//3 13//30 0 11//30 2//5 1 2//3 2//3 0 0 13//30 0 1//2 17//30 1 17//30 1//3 0 2//5 1//3 0 13//30 11//30 1 8//15 1//3 0 + 17//30 3//5 0 8//15 1//2 0 7//15 1//2 1 2//3 17//30 0 11//30 2//5 0 1//2 7//15 1 2//5 17//30 0 11//30 2//5 0 11//30 2//3 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 1 3//10 3//10 1 4//15 7//30 1 1//5 4//15 0 7//30 1//6 0 1//5 0 0 1//15 1//30 0 3//10 1//3 0 2//15 1//15 0 + 2//15 4//15 0 1//10 1//30 0 7//30 2//15 0 1//15 1//30 1 3//10 1//3 1 1//5 1//10 1 2//15 1//30 0 2//15 4//15 0 0 4//15 0 + 1//5 1//3 0 3//10 1//10 0 1//15 1//10 0 1//30 1//5 0 2//15 7//30 0 1//3 2//15 0 1//10 1//6 1 3//10 1//5 1 7//30 1//30 1 + ], + upper = N[ + 3//5 17//30 1 1//2 3//5 1 19//30 2//5 1 8//15 1//3 0 11//30 2//5 0 17//30 13//30 0 2//5 3//5 0 3//5 11//30 0 1//2 11//30 0 + 3//5 2//3 0 13//30 19//30 0 1//3 2//5 0 17//30 7//15 1 11//30 3//5 1 19//30 7//15 1 2//5 8//15 0 17//30 11//30 0 19//30 13//30 0 + 3//5 2//3 0 1//2 1//2 0 2//3 7//15 0 3//5 3//5 0 1//2 1//3 0 2//5 8//15 0 2//5 11//30 1 1//3 8//15 1 7//15 13//30 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + # Implicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ] + ), state_indices, action_indices, source_dims, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ] + ), state_indices, action_indices, source_dims, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ] + ), state_indices, action_indices, source_dims, action_vars) + + implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + + prop = FiniteTimeSafety([(3, i, j) for i in 1:3 for j in 1:3], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) + implicit_prob = VerificationProblem(implicit_mdp, spec) + + V, k, res = solve(prob, alg) + V_implicit, k_implicit, res_implicit = solve(implicit_prob, alg) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - V, k, res = solve(prob) - V_implicit, k_implicit, res_implicit = solve(implicit_prob) + @testset "second dimension" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + source_dims = (3, 2, 3) + action_vars = (1,) + + # Explicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 1 0 0 7//30 4//15 1//6 1//5 1//10 1//5 1 0 0 0 7//30 7//30 1//5 2//15 1//6 1 0 0 + 1//5 4//15 1//10 1//5 3//10 3//10 0 1 0 1//10 1//15 3//10 3//10 7//30 1//5 0 1 0 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 + 4//15 1//30 1//5 1//5 7//30 4//15 0 0 1 2//15 7//30 1//5 1//3 2//15 1//6 0 0 1 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 1 0 0 17//30 13//30 3//5 2//3 11//30 7//15 1 0 0 0 1//2 17//30 13//30 7//15 13//30 1 0 0 + 8//15 1//2 3//5 7//15 8//15 17//30 0 1 0 2//3 17//30 11//30 7//15 19//30 19//30 0 1 0 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 + 11//30 1//3 2//5 8//15 7//15 3//5 0 0 1 2//3 17//30 2//3 8//15 2//15 3//5 0 0 1 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 0 0 0 1//15 1//6 1//6 1//30 1//10 1//10 0 0 0 1//3 2//15 3//10 4//15 2//15 2//15 0 0 0 + 3//10 1//5 3//10 2//15 0 1//30 0 0 0 0 1//15 1//30 7//30 1//30 1//15 0 0 0 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 + 3//10 4//15 1//10 3//10 2//15 1//3 1 1 1 3//10 1//10 1//6 3//10 7//30 1//6 1 1 1 1//15 1//15 1//10 1//5 1//5 4//15 1 1 1 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 0 0 0 19//30 2//5 3//5 2//3 2//3 8//15 0 0 0 8//15 19//30 8//15 8//15 13//30 13//30 0 0 0 + 1//3 13//30 11//30 2//5 2//3 2//3 0 0 0 0 13//30 1//2 17//30 17//30 1//3 0 0 0 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 + 17//30 3//5 8//15 1//2 7//15 1//2 1 1 1 2//3 17//30 11//30 2//5 1//2 7//15 1 1 1 2//5 17//30 11//30 2//5 11//30 2//3 1 1 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1 1 1 1//5 4//15 7//30 1//6 1//5 0 0 0 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 + 2//15 4//15 1//10 1//30 7//30 2//15 0 0 0 1//15 1//30 3//10 1//3 1//5 1//10 1 1 1 2//15 1//30 2//15 4//15 0 4//15 0 0 0 + 1//5 1//3 3//10 1//10 1//15 1//10 0 0 0 1//30 1//5 2//15 7//30 1//3 2//15 0 0 0 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 1 1 1 8//15 1//3 11//30 2//5 17//30 13//30 0 0 0 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 + 3//5 2//3 13//30 19//30 1//3 2//5 0 0 0 17//30 7//15 11//30 3//5 19//30 7//15 1 1 1 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 + 3//5 2//3 1//2 1//2 2//3 7//15 0 0 0 3//5 3//5 1//2 1//3 2//5 8//15 0 0 0 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + # Implicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ] + ), state_indices, action_indices, source_dims, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ] + ), state_indices, action_indices, source_dims, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ] + ), state_indices, action_indices, source_dims, action_vars) + + implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + + prop = FiniteTimeSafety([(i, 3, j) for i in 1:3 for j in 1:3], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) + implicit_prob = VerificationProblem(implicit_mdp, spec) + + V, k, res = solve(prob, alg) + V_implicit, k_implicit, res_implicit = solve(implicit_prob, alg) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + @testset "last dimension" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + source_dims = (3, 3, 2) + action_vars = (1,) + + # Explicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1 0 0 1 0 0 1 0 0 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 0 1 0 0 1 0 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 0 0 1 0 0 1 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 1 0 0 1 0 0 1 0 0 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 0 1 0 0 1 0 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 0 0 1 0 0 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1 1 1 0 0 0 0 0 0 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 1 1 1 0 0 0 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 0 0 0 0 0 0 1 1 1 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 1 1 1 0 0 0 0 0 0 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 1 1 1 0 0 0 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 0 0 0 0 0 0 1 1 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 0 0 0 0 0 0 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 0 0 0 0 0 0 0 0 0 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 1 1 1 1 1 1 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 0 0 0 0 0 0 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 0 0 0 0 0 0 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 1 1 1 1 1 1 + ] + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + # Implicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ] + ), state_indices, action_indices, source_dims, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ] + ), state_indices, action_indices, source_dims, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ] + ), state_indices, action_indices, source_dims, action_vars) + + implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + + prop = FiniteTimeSafety([(i, j, 3) for i in 1:3 for j in 1:3], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) + implicit_prob = VerificationProblem(implicit_mdp, spec) + + V, k, res = solve(prob, alg) + V_implicit, k_implicit, res_implicit = solve(implicit_prob, alg) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end end - end - # 4-D abstraction - @testset "4D abstraction" begin - rng = MersenneTwister(995) + # 4-D abstraction + @testset "4D abstraction" begin + rng = MersenneTwister(995) - prob_lower = [rand(rng, N, 3, 81) ./ N(3) for _ in 1:4] - prob_upper = [(rand(rng, N, 3, 81) .+ N(1)) ./ N(3) for _ in 1:4] + prob_lower = [rand(rng, N, 3, 81) ./ N(3) for _ in 1:4] + prob_upper = [(rand(rng, N, 3, 81) .+ N(1)) ./ N(3) for _ in 1:4] - ambiguity_sets = ntuple( - i -> IntervalAmbiguitySets(; - lower = prob_lower[i], - upper = prob_upper[i], - ), - 4, - ) + ambiguity_sets = ntuple( + i -> IntervalAmbiguitySets(; + lower = prob_lower[i], + upper = prob_upper[i], + ), + 4, + ) - marginals = ntuple( - i -> Marginal(ambiguity_sets[i], (1, 2, 3, 4), (1,), (3, 3, 3, 3), (1,)), - 4, - ) + marginals = ntuple( + i -> Marginal(ambiguity_sets[i], (1, 2, 3, 4), (1,), (3, 3, 3, 3), (1,)), + 4, + ) - mdp = FactoredRobustMarkovDecisionProcess((3, 3, 3, 3), (1,), marginals) + mdp = FactoredRobustMarkovDecisionProcess((3, 3, 3, 3), (1,), marginals) - prop = FiniteTimeReachability([(3, 3, 3, 3)], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(mdp, spec) + prop = FiniteTimeReachability([(3, 3, 3, 3)], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) - V_ortho, it_ortho, res_ortho = solve(prob) + V_ortho, it_ortho, res_ortho = solve(prob, alg) - @test V_ortho[3, 3, 3, 3] ≈ one(N) - @test all(V_ortho .>= zero(N)) - @test all(V_ortho .<= one(N)) + @test V_ortho[3, 3, 3, 3] ≈ one(N) + @test all(V_ortho .>= zero(N)) + @test all(V_ortho .<= one(N)) - # Test against the naive construction - prob_lower_simple = zeros(N, 81, 81) - prob_upper_simple = zeros(N, 81, 81) + # Test against the naive construction + prob_lower_simple = zeros(N, 81, 81) + prob_upper_simple = zeros(N, 81, 81) - lin = LinearIndices((3, 3, 3, 3)) - act_idx = CartesianIndex(1) - for I in CartesianIndices((3, 3, 3, 3)) - for J in CartesianIndices((3, 3, 3, 3)) - marginal_ambiguity_sets = map(marginal -> marginal[act_idx, I], marginals) + lin = LinearIndices((3, 3, 3, 3)) + act_idx = CartesianIndex(1) + for I in CartesianIndices((3, 3, 3, 3)) + for J in CartesianIndices((3, 3, 3, 3)) + marginal_ambiguity_sets = map(marginal -> marginal[act_idx, I], marginals) - prob_lower_simple[lin[J], lin[I]] = prod( - lower(marginal_ambiguity_sets[i], J[i]) for i in 1:4 - ) + prob_lower_simple[lin[J], lin[I]] = prod( + lower(marginal_ambiguity_sets[i], J[i]) for i in 1:4 + ) - prob_upper_simple[lin[J], lin[I]] = prod( - upper(marginal_ambiguity_sets[i], J[i]) for i in 1:4 - ) + prob_upper_simple[lin[J], lin[I]] = prod( + upper(marginal_ambiguity_sets[i], J[i]) for i in 1:4 + ) + end end - end - ambiguity_set = IntervalAmbiguitySets(; - lower = prob_lower_simple, - upper = prob_upper_simple, - ) + ambiguity_set = IntervalAmbiguitySets(; + lower = prob_lower_simple, + upper = prob_upper_simple, + ) - imc = IntervalMarkovChain(ambiguity_set) + imc = IntervalMarkovChain(ambiguity_set) - prop = FiniteTimeReachability([81], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(imc, spec) + prop = FiniteTimeReachability([81], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(imc, spec) - V_direct, it_direct, res_direct = solve(prob) - @test V_direct[81] ≈ one(N) - @test all(V_ortho .≥ reshape(V_direct, 3, 3, 3, 3)) - end + V_direct, it_direct, res_direct = solve(prob, alg) + @test V_direct[81] ≈ one(N) + @test all(V_ortho .≥ reshape(V_direct, 3, 3, 3, 3)) + end - @testset "synthesis" begin - rng = MersenneTwister(3286) + @testset "synthesis" begin + rng = MersenneTwister(3286) - num_states_per_axis = 3 - num_axis = 3 - num_states = num_states_per_axis^num_axis - num_actions = 2 - num_choices = num_states * num_actions - state_indices = (1, 2, 3) - action_indices = (1,) - state_vars = ntuple(_ -> num_states_per_axis, num_axis) - action_vars = (num_actions,) + num_states_per_axis = 3 + num_axis = 3 + num_states = num_states_per_axis^num_axis + num_actions = 2 + num_choices = num_states * num_actions + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = ntuple(_ -> num_states_per_axis, num_axis) + action_vars = (num_actions,) - prob_lower = [ - rand(rng, N, num_states_per_axis, num_choices) ./ num_states_per_axis - for _ in 1:num_axis - ] - prob_upper = [ - (rand(rng, N, num_states_per_axis, num_choices) .+ N(1)) ./ - num_states_per_axis for _ in 1:num_axis - ] + prob_lower = [ + rand(rng, N, num_states_per_axis, num_choices) ./ num_states_per_axis + for _ in 1:num_axis + ] + prob_upper = [ + (rand(rng, N, num_states_per_axis, num_choices) .+ N(1)) ./ + num_states_per_axis for _ in 1:num_axis + ] - ambiguity_sets = ntuple( - i -> IntervalAmbiguitySets(; - lower = prob_lower[i], - upper = prob_upper[i], - ), - num_axis, - ) + ambiguity_sets = ntuple( + i -> IntervalAmbiguitySets(; + lower = prob_lower[i], + upper = prob_upper[i], + ), + num_axis, + ) - marginals = ntuple( - i -> Marginal(ambiguity_sets[i], state_indices, action_indices, state_vars, action_vars), - num_axis, - ) + marginals = ntuple( + i -> Marginal(ambiguity_sets[i], state_indices, action_indices, state_vars, action_vars), + num_axis, + ) - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, marginals) + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, marginals) - prop = FiniteTimeReachability( - [(num_states_per_axis, num_states_per_axis, num_states_per_axis)], - 10, - ) - spec = Specification(prop, Pessimistic, Maximize) - prob = ControlSynthesisProblem(mdp, spec) + prop = FiniteTimeReachability( + [(num_states_per_axis, num_states_per_axis, num_states_per_axis)], + 10, + ) + spec = Specification(prop, Pessimistic, Maximize) + prob = ControlSynthesisProblem(mdp, spec) - policy, V, it, res = solve(prob) - @test it == 10 - @test all(V .≥ 0.0) + policy, V, it, res = solve(prob, alg) + @test it == 10 + @test all(V .≥ 0.0) - # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP - prob = VerificationProblem(mdp, spec, policy) - V_mc, k, res = solve(prob) - @test V ≈ V_mc + # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP + prob = VerificationProblem(mdp, spec, policy) + V_mc, k, res = solve(prob, alg) + @test V ≈ V_mc + end end end diff --git a/test/sparse/factored.jl b/test/sparse/factored.jl index 4781ea82..f7649676 100644 --- a/test/sparse/factored.jl +++ b/test/sparse/factored.jl @@ -160,24 +160,24 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 2, 3) + Vres_first_McCormick = zeros(N, 2, 3) IntervalMDP.bellman!( ws, strategy_cache, - Vres_first, + Vres_first_McCormick, V, mdp; upper_bound = true, ) epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .+ epsilon .>= V_vertex) + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -186,11 +186,11 @@ using Random: MersenneTwister mdp; upper_bound = true, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -199,7 +199,50 @@ using Random: MersenneTwister mdp; upper_bound = true, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first_OMax end #### Minimization @@ -216,24 +259,24 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 2, 3) + Vres_first_McCormick = zeros(N, 2, 3) IntervalMDP.bellman!( ws, strategy_cache, - Vres_first, + Vres_first_McCormick, V, mdp; upper_bound = false, ) epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .- epsilon .<= V_vertex) + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .- epsilon .<= V_vertex) ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -242,11 +285,11 @@ using Random: MersenneTwister mdp; upper_bound = false, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -255,38 +298,82 @@ using Random: MersenneTwister mdp; upper_bound = false, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first_OMax end end + + @testset "bellman 2d partial dependence" begin state_vars = (2, 3) - action_vars = (1,) - jₐ = CartesianIndex(1) + action_vars = (1, 2) marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ + lower = N[ 0 7//30 0 13//30 4//15 1//6 - 2//5 7//30 0 11//30 2//15 1//10 - ]), - upper = sparse(N[ + 2//5 7//30 0 11//30 2//15 0 + ], + upper = N[ 17//30 7//10 2//3 4//5 7//10 2//3 9//10 13//15 9//10 5//6 4//5 14//15 - ]) + ] ), (1, 2), (1,), (2, 3), (1,)) marginal2 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//30 1//3 1//6 - 4//15 1//4 1//6 - 2//15 7//30 0 - ]), - upper = sparse(N[ - 2//3 7//15 4//5 - 23//30 4//5 23//30 - 7//15 4//5 23//30 - ]) - ), (2,), (1,), (3,), (1,)) + lower = N[ + 0 1//3 1//6 1//15 2//5 2//15 + 4//15 1//4 1//6 0 2//15 0 + 2//15 7//30 0 7//30 7//15 1//5 + ], + upper = N[ + 2//3 7//15 4//5 11//30 19//30 1//2 + 23//30 4//5 23//30 3//5 7//10 8//15 + 7//15 4//5 23//30 7//10 7//15 23//30 + ] + ), (2,), (2,), (3,), (2,)) mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) @@ -297,37 +384,146 @@ using Random: MersenneTwister eval_vertices(p, q) = sum(V[I] * p[I[1]] * q[I[2]] for I in CartesianIndices(state_vars)) #### Maximization - @testset "maximization" begin + @testset "max/max" begin V_vertex = [ maximum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) + maximum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₐ in CartesianIndices(action_vars) ) for jₛ in CartesianIndices(state_vars) - ] # The maximum will always be a vertex + ] # The (inner) maximum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_McCormick = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_McCormick, + V, + mdp; + upper_bound = true, + maximize = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_McCormick) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + maximize = true, + ) + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_McCormick) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + maximize = true, + ) + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = true, + maximize = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + maximize = true, + ) + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + maximize = true, + ) + @test Vres ≈ Vres_first_OMax + end + + @testset "min/max" begin + V_vertex = [ + minimum( + maximum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₐ in CartesianIndices(action_vars) + ) for jₛ in CartesianIndices(state_vars) + ] # The (inner) maximum will always be a vertex ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 2, 3) + Vres_first_McCormick = zeros(N, 2, 3) IntervalMDP.bellman!( ws, strategy_cache, - Vres_first, + Vres_first_McCormick, V, mdp; upper_bound = true, + maximize = false, ) epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .+ epsilon .>= V_vertex) + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -335,12 +531,13 @@ using Random: MersenneTwister V, mdp; upper_bound = true, + maximize = false, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -348,42 +545,198 @@ using Random: MersenneTwister V, mdp; upper_bound = true, + maximize = false, + ) + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = true, + maximize = false, ) - @test Vres ≈ Vres_first + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + maximize = false, + ) + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + maximize = false, + ) + @test Vres ≈ Vres_first_OMax end #### Minimization - @testset "minimization" begin + @testset "min/min" begin V_vertex = [ minimum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) + minimum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₐ in CartesianIndices(action_vars) ) for jₛ in CartesianIndices(state_vars) - ] # The minimum will always be a vertex + ] # The (inner) minimum will always be a vertex + + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_McCormick = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_McCormick, + V, + mdp; + upper_bound = false, + maximize = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_McCormick) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + maximize = false, + ) + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_McCormick) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + maximize = false, + ) + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = false, + maximize = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + maximize = false, + ) + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + maximize = false, + ) + @test Vres ≈ Vres_first_OMax + end + + @testset "max/min" begin + V_vertex = [ + maximum( + minimum( + splat(eval_vertices), + Iterators.product( + IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), + IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) + ) + ) for jₐ in CartesianIndices(action_vars) + ) for jₛ in CartesianIndices(state_vars) + ] # The (inner) minimum will always be a vertex ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 2, 3) - IntervalMDP._bellman_helper!( + Vres_first_McCormick = zeros(N, 2, 3) + IntervalMDP.bellman!( ws, strategy_cache, - Vres_first, + Vres_first_McCormick, V, mdp; upper_bound = false, + maximize = true, ) epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .- epsilon .<= V_vertex) + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .- epsilon .<= V_vertex) ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -391,12 +744,13 @@ using Random: MersenneTwister V, mdp; upper_bound = false, + maximize = true, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -404,8 +758,55 @@ using Random: MersenneTwister V, mdp; upper_bound = false, + maximize = true, + ) + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = false, + maximize = true, ) - @test Vres ≈ Vres_first + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + maximize = true, + ) + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + maximize = true, + ) + @test Vres ≈ Vres_first_OMax end end @@ -504,24 +905,24 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 3, 3, 3) + Vres_first_McCormick = zeros(N, 3, 3, 3) IntervalMDP.bellman!( ws, strategy_cache, - Vres_first, + Vres_first_McCormick, V, mdp; upper_bound = true, ) epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .+ epsilon .>= V_vertex) + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -530,11 +931,54 @@ using Random: MersenneTwister mdp; upper_bound = true, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) IntervalMDP.bellman!( ws, strategy_cache, @@ -543,7 +987,7 @@ using Random: MersenneTwister mdp; upper_bound = true, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_OMax end #### Minimization @@ -561,24 +1005,24 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 3, 3, 3) - IntervalMDP._bellman_helper!( + Vres_first_McCormick = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( ws, strategy_cache, - Vres_first, + Vres_first_McCormick, V, mdp; upper_bound = false, ) epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .- epsilon .<= V_vertex) + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .- epsilon .<= V_vertex) ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -587,11 +1031,41 @@ using Random: MersenneTwister mdp; upper_bound = false, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) IntervalMDP.bellman!( ws, strategy_cache, @@ -600,7 +1074,20 @@ using Random: MersenneTwister mdp; upper_bound = false, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first_OMax end end @@ -699,24 +1186,24 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 3, 3, 3) + Vres_first_McCormick = zeros(N, 3, 3, 3) IntervalMDP.bellman!( ws, strategy_cache, - Vres_first, + Vres_first_McCormick, V, mdp; upper_bound = true, ) epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .+ epsilon .>= V_vertex) + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -725,11 +1212,11 @@ using Random: MersenneTwister mdp; upper_bound = true, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -738,7 +1225,50 @@ using Random: MersenneTwister mdp; upper_bound = true, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = true, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .+ epsilon .>= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = true, + ) + @test Vres ≈ Vres_first_OMax end #### Minimization @@ -756,24 +1286,24 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres_first = zeros(N, 3, 3, 3) - IntervalMDP._bellman_helper!( + Vres_first_McCormick = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( ws, strategy_cache, - Vres_first, + Vres_first_McCormick, V, mdp; upper_bound = false, ) epsilon = N == Float32 ? 1e-5 : 1e-8 - @test all(Vres_first .>= 0.0) - @test all(Vres_first .<= maximum(V)) - @test all(Vres_first .- epsilon .<= V_vertex) + @test all(Vres_first_McCormick .>= 0.0) + @test all(Vres_first_McCormick .<= maximum(V)) + @test all(Vres_first_McCormick .- epsilon .<= V_vertex) ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( ws, strategy_cache, @@ -782,11 +1312,54 @@ using Random: MersenneTwister mdp; upper_bound = false, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) - Vres = similar(Vres_first) + Vres = similar(Vres_first_McCormick) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first_McCormick + + ws = IntervalMDP.construct_workspace(mdp, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres_first_OMax = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres_first_OMax, + V, + mdp; + upper_bound = false, + ) + + epsilon = N == Float32 ? 1e-5 : 1e-8 + @test all(Vres_first_OMax .>= 0.0) + @test all(Vres_first_OMax .<= maximum(V)) + @test all(Vres_first_OMax .- epsilon .<= V_vertex) + + ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + mdp; + upper_bound = false, + ) + @test Vres ≈ Vres_first_OMax + + ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + Vres = similar(Vres_first_OMax) IntervalMDP.bellman!( ws, strategy_cache, @@ -795,7 +1368,7 @@ using Random: MersenneTwister mdp; upper_bound = false, ) - @test Vres ≈ Vres_first + @test Vres ≈ Vres_first_OMax end end From 87f652552ec72583e4d1f0ad06d92c786bfa2cf9 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Fri, 12 Sep 2025 00:06:53 +0200 Subject: [PATCH 11/71] Fix and test dense CUDA --- Project.toml | 3 + ext/IntervalMDPCudaExt.jl | 94 +- ext/cuda/array.jl | 14 +- ext/cuda/bellman/dense.jl | 264 ++-- ext/cuda/bellman/sparse.jl | 6 +- ext/cuda/interval_probabilities.jl | 67 - ext/cuda/probabilities.jl | 18 + ext/cuda/strategy.jl | 6 +- ext/cuda/workspace.jl | 32 +- src/IntervalMDP.jl | 1 + src/bellman.jl | 6 +- .../FactoredRobustMarkovDecisionProcess.jl | 24 +- src/probabilities/IntervalAmbiguitySets.jl | 24 +- src/probabilities/Marginal.jl | 32 +- src/utils.jl | 21 +- src/workspace.jl | 2 +- test/base/imdp.jl | 1 + test/cuda/cuda.jl | 10 +- test/cuda/dense/bellman.jl | 84 +- test/cuda/dense/imdp.jl | 1125 ++++++++++------- test/cuda/dense/synthesis.jl | 86 +- test/cuda/dense/vi.jl | 303 +++-- test/runtests.jl | 2 +- 23 files changed, 1243 insertions(+), 982 deletions(-) delete mode 100644 ext/cuda/interval_probabilities.jl create mode 100644 ext/cuda/probabilities.jl diff --git a/Project.toml b/Project.toml index cdd0854e..1b7c9882 100644 --- a/Project.toml +++ b/Project.toml @@ -19,6 +19,9 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" +[extensions] +IntervalMDPCudaExt = ["Adapt", "CUDA", "GPUArrays", "LLVM"] + [compat] Adapt = "4" CUDA = "5.1" diff --git a/ext/IntervalMDPCudaExt.jl b/ext/IntervalMDPCudaExt.jl index 9af2d4d3..328f01aa 100644 --- a/ext/IntervalMDPCudaExt.jl +++ b/ext/IntervalMDPCudaExt.jl @@ -8,89 +8,51 @@ using GPUArrays: AbstractGPUArray, AbstractGPUVector, AbstractGPUMatrix using IntervalMDP, LinearAlgebra -Adapt.@adapt_structure IntervalProbabilities -Adapt.@adapt_structure OrthogonalIntervalProbabilities +Adapt.@adapt_structure Marginal Adapt.@adapt_structure StationaryStrategy Adapt.@adapt_structure TimeVaryingStrategy -# Opinionated conversion to GPU with Float64 values and Int32 indices -IntervalMDP.cu(model) = adapt(IntervalMDP.CuModelAdaptor{Float64}, model) -IntervalMDP.cpu(model) = adapt(IntervalMDP.CpuModelAdaptor{Float64}, model) +# Opinionated conversion to GPU with preserved value types and Int32 indices +IntervalMDP.cu(model) = adapt(IntervalMDP.CuModelAdaptor{IntervalMDP.valuetype(model)}, model) +IntervalMDP.cpu(model) = adapt(IntervalMDP.CpuModelAdaptor{IntervalMDP.valuetype(model)}, model) function Adapt.adapt_structure( T::Type{<:IntervalMDP.CuModelAdaptor}, - mdp::IntervalMarkovDecisionProcess, + mdp::IntervalMDP.FactoredRMDP, ) - return IntervalMarkovDecisionProcess( - adapt(T, transition_prob(mdp)), - adapt(CuArray{Int32}, IntervalMDP.stateptr(mdp)), + return IntervalMDP.FactoredRMDP( + state_variables(mdp), + action_variables(mdp), + IntervalMDP.source_shape(mdp), + adapt(T, marginals(mdp)), adapt(CuArray{Int32}, initial_states(mdp)), - num_states(mdp), + Val(false) # check = false ) end function Adapt.adapt_structure( T::Type{<:IntervalMDP.CpuModelAdaptor}, - mdp::IntervalMarkovDecisionProcess, + mdp::IntervalMDP.FactoredRMDP, ) - return IntervalMarkovDecisionProcess( - adapt(T, transition_prob(mdp)), - adapt(Array{Int32}, IntervalMDP.stateptr(mdp)), + return IntervalMDP.FactoredRMDP( + state_variables(mdp), + action_variables(mdp), + IntervalMDP.source_shape(mdp), + adapt(T, marginals(mdp)), adapt(Array{Int32}, initial_states(mdp)), - num_states(mdp), + Val(false) # check = false ) end -function Adapt.adapt_structure( - T::Type{<:IntervalMDP.CuModelAdaptor}, - mdp::OrthogonalIntervalMarkovDecisionProcess, -) - return OrthogonalIntervalMarkovDecisionProcess( - adapt(T, transition_prob(mdp)), - adapt(CuArray{Int32}, IntervalMDP.stateptr(mdp)), - adapt(CuArray{Int32}, initial_states(mdp)), - num_states(mdp), - ) -end - -function Adapt.adapt_structure( - T::Type{<:IntervalMDP.CpuModelAdaptor}, - mdp::OrthogonalIntervalMarkovDecisionProcess, -) - return OrthogonalIntervalMarkovDecisionProcess( - adapt(T, transition_prob(mdp)), - adapt(Array{Int32}, IntervalMDP.stateptr(mdp)), - adapt(Array{Int32}, initial_states(mdp)), - num_states(mdp), - ) -end - -function Adapt.adapt_structure( - T::Type{<:IntervalMDP.CuModelAdaptor}, - mdp::MixtureIntervalMarkovDecisionProcess, -) - return MixtureIntervalMarkovDecisionProcess( - adapt(T, transition_prob(mdp)), - adapt(CuArray{Int32}, IntervalMDP.stateptr(mdp)), - adapt(CuArray{Int32}, initial_states(mdp)), - num_states(mdp), - ) -end - -function Adapt.adapt_structure( - T::Type{<:IntervalMDP.CpuModelAdaptor}, - mdp::MixtureIntervalMarkovDecisionProcess, -) - return MixtureIntervalMarkovDecisionProcess( - adapt(T, transition_prob(mdp)), - adapt(Array{Int32}, IntervalMDP.stateptr(mdp)), - adapt(Array{Int32}, initial_states(mdp)), - num_states(mdp), +function Adapt.adapt_structure(to, as::IntervalAmbiguitySets) + return IntervalAmbiguitySets( + adapt(to, as.lower), + adapt(to, as.gap), + Val(false) # check = false ) end -Adapt.adapt_structure(T::Type{<:IntervalMDP.CuModelAdaptor}, is::AllStates) = is -Adapt.adapt_structure(T::Type{<:IntervalMDP.CpuModelAdaptor}, is::AllStates) = is +Adapt.@adapt_structure IntervalMDP.AllStates function IntervalMDP.checkdevice(::AbstractGPUArray, ::AbstractGPUMatrix) # Both arguments are on the GPU. @@ -120,8 +82,8 @@ end IntervalMDP.arrayfactory( ::MR, T, - num_states, -) where {R, MR <: Union{CuSparseMatrixCSC{R}, CuArray{R}}} = CuArray{T}(undef, num_states) + sizes, +) where {R, MR <: Union{CuSparseMatrixCSC{R}, CuArray{R}}} = CuArray{T}(undef, sizes) include("cuda/utils.jl") include("cuda/array.jl") @@ -129,8 +91,8 @@ include("cuda/sorting.jl") include("cuda/workspace.jl") include("cuda/strategy.jl") include("cuda/bellman/dense.jl") -include("cuda/bellman/sparse.jl") -include("cuda/interval_probabilities.jl") +# include("cuda/bellman/sparse.jl") +include("cuda/probabilities.jl") include("cuda/specification.jl") end diff --git a/ext/cuda/array.jl b/ext/cuda/array.jl index cf6183ab..5d5ebd44 100644 --- a/ext/cuda/array.jl +++ b/ext/cuda/array.jl @@ -15,18 +15,6 @@ CUDA.CUSPARSE.CuSparseMatrixCSC{Tv, Ti}(M::SparseMatrixCSC) where {Tv, Ti} = size(M), ) -const CuSparseColumnView{Tv, Ti} = SubArray{ - Tv, - 1, - CuSparseMatrixCSC{Tv, Ti}, - Tuple{Base.Slice{Base.OneTo{Int}}, Int}, - false, -} - -function SparseArrays.nnz(x::CuSparseColumnView) - rowidx, colidx = parentindices(x) - return length(nzrange(parent(x), colidx)) -end SparseArrays.nzrange(S::CuSparseMatrixCSC, col::Integer) = CUDA.@allowscalar(S.colPtr[col]):(CUDA.@allowscalar(S.colPtr[col + 1]) - 1) @@ -49,5 +37,5 @@ Adapt.adapt_storage( Adapt.adapt_storage(::Type{IntervalMDP.CpuModelAdaptor{Tv}}, x::CuArray{Tv}) where {Tv} = adapt(Array{Tv}, x) -Adapt.adapt_storage(::Type{IntervalMDP.CpuModelAdaptor{Tv}}, x::CuArray{Int32}) where {Tv} = +Adapt.adapt_storage(::Type{IntervalMDP.CpuModelAdaptor{Tv}}, x::CuArray{<:Integer}) where {Tv} = adapt(Array{Int32}, x) diff --git a/ext/cuda/bellman/dense.jl b/ext/cuda/bellman/dense.jl index 6140b0bf..08eb643e 100644 --- a/ext/cuda/bellman/dense.jl +++ b/ext/cuda/bellman/dense.jl @@ -1,25 +1,29 @@ function IntervalMDP._bellman_helper!( - workspace::CuDenseWorkspace, + workspace::CuDenseOMaxWorkspace, strategy_cache::IntervalMDP.AbstractStrategyCache, - Vres, - V, - prob::IntervalProbabilities{Tv}, - stateptr; + Vres::AbstractVector{Tv}, + V::AbstractVector{Tv}, + model; upper_bound = false, maximize = true, ) where {Tv} - max_states_per_block = 32 - shmem = - length(V) * (sizeof(Int32) + sizeof(Tv)) + - max_states_per_block * workspace.max_actions * sizeof(Tv) + n_actions = isa(strategy_cache, IntervalMDP.OptimizingStrategyCache) ? workspace.num_actions : 1 + marginal = marginals(model)[1] + n_states = source_shape(marginal)[1] + + if IntervalMDP.valuetype(marginal) != Tv + throw(ArgumentError("Value type of the model ($(IntervalMDP.valuetype(marginal))) does not match the value type of the input vector ($Tv).")) + end + + max_states_per_block = 32 # == num_warps + shmem = length(V) * (sizeof(Int32) + sizeof(Tv)) + max_states_per_block * n_actions * sizeof(Tv) kernel = @cuda launch = false dense_bellman_kernel!( workspace, active_cache(strategy_cache), Vres, V, - prob, - stateptr, + marginal, upper_bound ? (>=) : (<=), maximize ? (max, >, typemin(Tv)) : (min, <, typemax(Tv)), ) @@ -28,27 +32,23 @@ function IntervalMDP._bellman_helper!( max_threads = prevwarp(device(), config.threads) # Execution plan: - # - value assignment: 1 warp per state + # - value assignment: 1 warp per state/action pair + # - reduce over actions in the first warp for each state # - squeeze as many states as possible in a block # - use shared memory to store the values and permutation # - use bitonic sort to sort the values for all states in a block - num_states = length(stateptr) - one(Int32) - wanted_threads = min(1024, 32 * num_states) - - threads = min(max_threads, wanted_threads) - warps = div(threads, 32) - blocks = min(2^16 - 1, cld(num_states, warps)) - shmem = - length(V) * (sizeof(Int32) + sizeof(Tv)) + - warps * workspace.max_actions * sizeof(Tv) + threads_per_state = min(max_threads, 32 * n_actions) + states_per_block = min(n_states, div(max_threads, threads_per_state)) + threads = threads_per_state * states_per_block + blocks = min(2^16 - 1, cld(n_states, states_per_block)) + shmem = length(V) * (sizeof(Int32) + sizeof(Tv)) + states_per_block * n_actions * sizeof(Tv) kernel( workspace, active_cache(strategy_cache), Vres, V, - prob, - stateptr, + marginal, upper_bound ? (>=) : (<=), maximize ? (max, >, typemin(Tv)) : (min, <, typemax(Tv)); blocks = blocks, @@ -62,28 +62,21 @@ end function dense_bellman_kernel!( workspace, strategy_cache, - Vres, + Vres::AbstractVector{Tv}, V, - prob::IntervalProbabilities{Tv}, - stateptr, + marginal, value_lt, action_reduce, ) where {Tv} - assume(warpsize() == 32) - nwarps = div(blockDim().x, warpsize()) - wid = fld1(threadIdx().x, warpsize()) - # Prepare action workspace shared memory - action_workspace = CuDynamicSharedArray(Tv, (workspace.max_actions, nwarps)) - @inbounds action_workspace = @view action_workspace[:, wid] + tps = threads_per_state(workspace, strategy_cache) + states_per_block = div(blockDim().x, tps) + sid = fld1(threadIdx().x, tps) + + action_workspace = initialize_action_workspace(workspace, strategy_cache, V, states_per_block, sid) # Prepare sorting shared memory - value = CuDynamicSharedArray(Tv, length(V), nwarps * workspace.max_actions * sizeof(Tv)) - perm = CuDynamicSharedArray( - Int32, - length(V), - (nwarps * workspace.max_actions + length(V)) * sizeof(Tv), - ) + value, perm = initialize_value_and_perm(workspace, strategy_cache, V, marginal, states_per_block) # Perform sorting dense_initialize_sorting_shared_memory!(V, value, perm) @@ -91,19 +84,84 @@ function dense_bellman_kernel!( # O-maxmization dense_omaximization!( + workspace, action_workspace, strategy_cache, Vres, + V, + marginal, value, perm, - prob, - stateptr, + states_per_block, + sid, action_reduce, ) return nothing end +@inline function initialize_action_workspace( + workspace, + ::OptimizingActiveCache, + marginal, + states_per_block, + sid, +) + action_workspace = CuDynamicSharedArray(IntervalMDP.valuetype(marginal), (workspace.num_actions, states_per_block)) + @inbounds return @view action_workspace[:, sid] +end + +@inline function initialize_action_workspace( + workspace, + ::NonOptimizingActiveCache, + marginal, + states_per_block, + sid, +) + return nothing +end + +@inline function initialize_value_and_perm( + workspace, + ::OptimizingActiveCache, + V::AbstractVector{Tv}, + marginal, + states_per_block, +) where {Tv} + Tv2 = IntervalMDP.valuetype(marginal) + value = CuDynamicSharedArray(Tv, length(V), workspace.num_actions * states_per_block * sizeof(Tv2)) + perm = CuDynamicSharedArray(Int32, length(V), workspace.num_actions * states_per_block * sizeof(Tv2) + length(V) * sizeof(Tv)) + return value, perm +end + +@inline function initialize_value_and_perm( + workspace, + ::NonOptimizingActiveCache, + V::AbstractVector{Tv}, + marginal, + states_per_block, +) where {Tv} + value = CuDynamicSharedArray(Tv, length(V)) + perm = CuDynamicSharedArray(Int32, length(V), length(V) * sizeof(Tv)) + return value, perm +end + +@inline function threads_per_state( + workspace, + ::OptimizingActiveCache, +) + assume(warpsize() == 32) + return min(blockDim().x, warpsize() * workspace.num_actions) +end + +@inline function threads_per_state( + workspace, + ::NonOptimizingActiveCache, +) + assume(warpsize() == 32) + return warpsize() +end + @inline function dense_initialize_sorting_shared_memory!(V, value, perm) # Copy into shared memory i = threadIdx().x @@ -118,106 +176,104 @@ end end @inline function dense_omaximization!( + workspace, action_workspace, strategy_cache, Vres, + V, + marginal, value, perm, - prob, - stateptr, + states_per_block, + sid, action_reduce, ) - assume(warpsize() == 32) - - warps = div(blockDim().x, warpsize()) - wid = fld1(threadIdx().x, warpsize()) - - num_states = length(stateptr) - one(Int32) - j = wid + (blockIdx().x - one(Int32)) * warps - @inbounds while j <= num_states + jₛ = sid + (blockIdx().x - one(Int32)) * states_per_block + @inbounds while jₛ <= source_shape(marginal)[1] # Grid-stride loop state_dense_omaximization!( + workspace, action_workspace, strategy_cache, Vres, + V, + marginal, value, perm, - prob, - stateptr, + jₛ, action_reduce, - j, ) - j += gridDim().x * warps + jₛ += gridDim().x * states_per_block end return nothing end @inline function state_dense_omaximization!( + workspace, action_workspace, strategy_cache::OptimizingActiveCache, - Vres, + Vres::AbstractVector{Tv}, + V, + marginal, value, perm, - prob::IntervalProbabilities{Tv}, - stateptr, - action_reduce, jₛ, + action_reduce, ) where {Tv} - lane = mod1(threadIdx().x, warpsize()) + assume(warpsize() == 32) + + tps = threads_per_state(workspace, strategy_cache) + nwarps_per_state = div(tps, warpsize()) - s₁, s₂ = stateptr[jₛ], stateptr[jₛ + one(Int32)] - nactions = s₂ - s₁ - @inbounds action_values = @view action_workspace[1:nactions] + warp, lane = fldmod1(threadIdx().x, warpsize()) + state_warp = mod1(warp, nwarps_per_state) - k = one(Int32) - @inbounds while k <= nactions - jₐ = s₁ + k - one(Int32) - lowerⱼ = @view lower(prob)[:, jₐ] - gapⱼ = @view gap(prob)[:, jₐ] - sum_lowerⱼ = sum_lower(prob)[jₐ] + jₐ = state_warp + @inbounds while jₐ <= action_shape(marginal)[1] + ambiguity_set = marginal[(jₐ,), (jₛ,)] # Use O-maxmization to find the value for the action - v = state_action_dense_omaximization!(value, perm, lowerⱼ, gapⱼ, sum_lowerⱼ, lane) + v = state_action_dense_omaximization!(V, value, perm, ambiguity_set, lane) if lane == one(Int32) - action_values[k] = v + action_workspace[jₐ] = v end sync_warp() - k += one(Int32) + jₐ += nwarps_per_state end # Find the best action - v = extract_strategy_warp!(strategy_cache, action_values, Vres, jₛ, action_reduce, lane) + if state_warp == one(Int32) + v = extract_strategy_warp!(strategy_cache, action_workspace, Vres, jₛ, action_reduce, lane) - if lane == one(Int32) - Vres[jₛ] = v + if lane == one(Int32) + Vres[jₛ] = v + end end - sync_warp() + sync_threads() end @inline function state_dense_omaximization!( + workspace, action_workspace, strategy_cache::NonOptimizingActiveCache, - Vres, + Vres::AbstractVector{Tv}, + V, + marginal, value, perm, - prob::IntervalProbabilities{Tv}, - stateptr, - action_reduce, jₛ, + action_reduce, ) where {Tv} lane = mod1(threadIdx().x, warpsize()) @inbounds begin - s₁ = stateptr[jₛ] - jₐ = s₁ + strategy_cache[jₛ] - one(Int32) - lowerⱼ = @view lower(prob)[:, jₐ] - gapⱼ = @view gap(prob)[:, jₐ] - sum_lowerⱼ = sum_lower(prob)[jₐ] + jₐ = Int32.(strategy_cache[jₛ]) + ambiguity_set = marginal[jₐ, (jₛ,)] # Use O-maxmization to find the value for the action - v = state_action_dense_omaximization!(value, perm, lowerⱼ, gapⱼ, sum_lowerⱼ, lane) + v = state_action_dense_omaximization!(V, value, perm, ambiguity_set, lane) if lane == one(Int32) Vres[jₛ] = v @@ -227,42 +283,42 @@ end end @inline function state_action_dense_omaximization!( + V, value, perm, - lower, - gap, - sum_lower::Tv, + ambiguity_set::IntervalMDP.IntervalAmbiguitySet{R, MR}, lane, -) where {Tv} +) where {R, MR <: AbstractArray} assume(warpsize() == 32) - warp_aligned_length = kernel_nextwarp(length(lower)) - remaining = one(Tv) - sum_lower - gap_value = zero(Tv) + warp_aligned_length = kernel_nextwarp(IntervalMDP.supportsize(ambiguity_set)) + used = zero(R) + gap_value = zero(R) # Add the lower bound multiplied by the value s = lane @inbounds while s <= warp_aligned_length # Find index of the permutation, and lookup the corresponding lower bound and multipy by the value - if s <= length(lower) - p = perm[s] - - gap_value += lower[p] * value[s] + if s <= IntervalMDP.supportsize(ambiguity_set) + gap_value += lower(ambiguity_set, s) * V[s] + used += lower(ambiguity_set, s) end - s += warpsize() end + used = CUDA.reduce_warp(+, used) + used = shfl_sync(0xffffffff, used, one(Int32)) + remaining = one(R) - used sync_warp() # Add the gap multiplied by the value s = lane @inbounds while s <= warp_aligned_length # Find index of the permutation, and lookup the corresponding gap - g = if s <= length(gap) - gap[perm[s]] + g = if s <= IntervalMDP.supportsize(ambiguity_set) + gap(ambiguity_set, perm[s]) else # 0 gap is a neural element - zero(Tv) + zero(R) end # Cummulatively sum the gap with a tree reduction @@ -273,8 +329,8 @@ end remaining += g # Update the probability - if s <= length(gap) - g = clamp(remaining, zero(Tv), g) + if s <= IntervalMDP.supportsize(ambiguity_set) + g = clamp(remaining, zero(R), g) gap_value += g * value[s] remaining -= g end @@ -283,7 +339,7 @@ end remaining = shfl_sync(0xffffffff, remaining, warpsize()) # Early exit if the remaining probability is zero - if remaining <= zero(Tv) + if remaining <= zero(R) break end @@ -293,4 +349,4 @@ end gap_value = CUDA.reduce_warp(+, gap_value) return gap_value -end +end \ No newline at end of file diff --git a/ext/cuda/bellman/sparse.jl b/ext/cuda/bellman/sparse.jl index 8f658e76..13227f09 100644 --- a/ext/cuda/bellman/sparse.jl +++ b/ext/cuda/bellman/sparse.jl @@ -1,5 +1,5 @@ function IntervalMDP._bellman_helper!( - workspace::CuSparseWorkspace, + workspace::CuSparseOMaxWorkspace, strategy_cache::IntervalMDP.AbstractStrategyCache, Vres, V, @@ -77,7 +77,7 @@ function IntervalMDP._bellman_helper!( end function try_small_sparse_bellman!( - workspace::CuSparseWorkspace, + workspace::CuSparseOMaxWorkspace, strategy_cache::IntervalMDP.AbstractStrategyCache, Vres, V, @@ -422,7 +422,7 @@ end function try_large_sparse_bellman!( ::Type{T1}, ::Type{T2}, - workspace::CuSparseWorkspace, + workspace::CuSparseOMaxWorkspace, strategy_cache::IntervalMDP.AbstractStrategyCache, Vres, V, diff --git a/ext/cuda/interval_probabilities.jl b/ext/cuda/interval_probabilities.jl deleted file mode 100644 index a1c4898a..00000000 --- a/ext/cuda/interval_probabilities.jl +++ /dev/null @@ -1,67 +0,0 @@ - -function IntervalMDP.compute_gap( - lower::M, - upper::M, -) where {Tv, Ti, M <: CuSparseMatrixCSC{Tv, Ti}} - # FIXME: This is an ugly, non-robust hack. - upper = SparseMatrixCSC(upper) - lower = SparseMatrixCSC(lower) - lower, gap = IntervalMDP.compute_gap(lower, upper) - return adapt(IntervalMDP.CuModelAdaptor{Tv}, lower), - adapt(IntervalMDP.CuModelAdaptor{Tv}, gap) -end - -function IntervalMDP.interval_prob_hcat( - transition_probs::Vector{ - <:IntervalProbabilities{Tv, <:AbstractVector{Tv}, <:CuSparseMatrixCSC{Tv, Ti}}, - }, -) where {Tv, Ti} - num_dest = size(lower(first(transition_probs)), 1) - - @assert all(x -> size(lower(x), 1) == num_dest, transition_probs) "The dimensions of all matrices must be the same" - @assert all(x -> size(gap(x), 1) == num_dest, transition_probs) "The dimensions of all matrices must be the same" - - num_col = mapreduce(x -> size(lower(x), 2), +, transition_probs) - dims = (num_dest, num_col) - - l = map(lower, transition_probs) - - l_colptr = CUDA.zeros(Ti, num_col + 1) - nnz_sofar = 0 - nX_sofar = 0 - @inbounds for i in eachindex(l) - li = l[i] - nX = size(li, 2) - l_colptr[(1:(nX + 1)) .+ nX_sofar] = li.colPtr .+ nnz_sofar - nnz_sofar += nnz(li) - nX_sofar += nX - end - - l_rowval = mapreduce(lower -> lower.rowVal, vcat, l) - l_nzval = mapreduce(lower -> lower.nzVal, vcat, l) - l = CuSparseMatrixCSC(l_colptr, l_rowval, l_nzval, dims) - - g = map(gap, transition_probs) - - g_colptr = CUDA.zeros(Ti, num_col + 1) - nnz_sofar = 0 - nX_sofar = 0 - @inbounds for i in eachindex(g) - gi = g[i] - nX = size(gi, 2) - g_colptr[(1:(nX + 1)) .+ nX_sofar] = gi.colPtr .+ nnz_sofar - nnz_sofar += nnz(gi) - nX_sofar += nX - end - - g_rowval = mapreduce(lower -> lower.rowVal, vcat, g) - g_nzval = mapreduce(lower -> lower.nzVal, vcat, g) - g = CuSparseMatrixCSC(g_colptr, g_rowval, g_nzval, dims) - - sl = mapreduce(sum_lower, vcat, transition_probs) - - lengths = map(num_source, transition_probs) - stateptr = CuVector{Ti}([1; cumsum(lengths) .+ 1]) - - return IntervalProbabilities(l, g, sl), stateptr -end diff --git a/ext/cuda/probabilities.jl b/ext/cuda/probabilities.jl new file mode 100644 index 00000000..bcd2fc84 --- /dev/null +++ b/ext/cuda/probabilities.jl @@ -0,0 +1,18 @@ + +function IntervalMDP.compute_gap( + lower::M, + upper::M, +) where {Tv, M <: CuSparseMatrixCSC{Tv}} + # FIXME: This is an ugly, non-robust hack. + upper = SparseMatrixCSC(upper) + lower = SparseMatrixCSC(lower) + lower, gap = IntervalMDP.compute_gap(lower, upper) + + return adapt(IntervalMDP.CuModelAdaptor{Tv}, lower), adapt(IntervalMDP.CuModelAdaptor{Tv}, gap) +end + +IntervalMDP.support(p::IntervalMDP.IntervalAmbiguitySet{R, <:SubArray{R, 1, <:CuSparseDeviceMatrixCSC}}) where {R} = rowvals(p.gap) +IntervalMDP.supportsize(p::IntervalMDP.IntervalAmbiguitySet{R, <:SubArray{R, 1, <:CuSparseDeviceMatrixCSC}}) where {R} = nnz(p.gap) + +IntervalMDP.support(p::IntervalMDP.IntervalAmbiguitySet{R, <:SubArray{R, 1, <:CuDeviceMatrix}}) where {R} = eachindex(p.gap) +IntervalMDP.supportsize(p::IntervalMDP.IntervalAmbiguitySet{R, <:SubArray{R, 1, <:CuDeviceMatrix}}) where {R} = length(p.gap) diff --git a/ext/cuda/strategy.jl b/ext/cuda/strategy.jl index 326f15b4..2e17ac0d 100644 --- a/ext/cuda/strategy.jl +++ b/ext/cuda/strategy.jl @@ -7,7 +7,7 @@ Adapt.@adapt_structure NoStrategyActiveCache return NoStrategyActiveCache() end -struct TimeVaryingStrategyActiveCache{V <: AbstractVector{Int32}} <: OptimizingActiveCache +struct TimeVaryingStrategyActiveCache{N, V <: AbstractVector{NTuple{N, Int32}}} <: OptimizingActiveCache cur_strategy::V end Adapt.@adapt_structure TimeVaryingStrategyActiveCache @@ -15,7 +15,7 @@ Adapt.@adapt_structure TimeVaryingStrategyActiveCache return TimeVaryingStrategyActiveCache(strategy_cache.cur_strategy) end -struct StationaryStrategyActiveCache{V <: AbstractVector{Int32}} <: OptimizingActiveCache +struct StationaryStrategyActiveCache{N, V <: AbstractVector{NTuple{N, Int32}}} <: OptimizingActiveCache strategy::V end Adapt.@adapt_structure StationaryStrategyActiveCache @@ -25,7 +25,7 @@ end abstract type NonOptimizingActiveCache <: ActiveCache end -struct GivenStrategyActiveCache{V <: AbstractVector{Int32}} <: NonOptimizingActiveCache +struct GivenStrategyActiveCache{N, V <: AbstractVector{NTuple{N, Int32}}} <: NonOptimizingActiveCache strategy::V end Adapt.@adapt_structure GivenStrategyActiveCache diff --git a/ext/cuda/workspace.jl b/ext/cuda/workspace.jl index 750a9eaf..8afb61e0 100644 --- a/ext/cuda/workspace.jl +++ b/ext/cuda/workspace.jl @@ -3,29 +3,33 @@ abstract type AbstractCuWorkspace end ################### # Dense workspace # ################### -struct CuDenseWorkspace <: AbstractCuWorkspace - max_actions::Int32 +struct CuDenseOMaxWorkspace <: AbstractCuWorkspace + num_actions::Int32 end IntervalMDP.construct_workspace( - prob::IntervalProbabilities{R, VR, MR}, - max_actions = 1, -) where {R, VR, MR <: AbstractGPUMatrix{R}} = CuDenseWorkspace(max_actions) + prob::IntervalAmbiguitySets{R, MR}, + ::OMaximization; + num_actions = 1, + kwargs... +) where {R, MR <: AbstractGPUMatrix{R}} = CuDenseOMaxWorkspace(num_actions) #################### # Sparse workspace # #################### -struct CuSparseWorkspace <: AbstractCuWorkspace - max_nonzeros::Int32 - max_actions::Int32 +struct CuSparseOMaxWorkspace <: AbstractCuWorkspace + max_support::Int32 + num_actions::Int32 end -function CuSparseWorkspace(p::AbstractCuSparseMatrix, max_actions) - max_nonzeros = maximum(nnz, eachcol(p)) - return CuSparseWorkspace(max_nonzeros, max_actions) +function CuSparseOMaxWorkspace(p::IntervalAmbiguitySets, num_actions) + max_support = maximum(length ∘ IntervalMDP.support, p) + return CuSparseOMaxWorkspace(max_support, num_actions) end IntervalMDP.construct_workspace( - prob::IntervalProbabilities{R, VR, MR}, - max_actions = 1, -) where {R, VR, MR <: AbstractCuSparseMatrix{R}} = CuSparseWorkspace(gap(prob), max_actions) + prob::IntervalAmbiguitySets{R, MR}, + ::OMaximization; + num_actions = 1, + kwargs... +) where {R, MR <: AbstractCuSparseMatrix{R}} = CuSparseOMaxWorkspace(prob, num_actions) diff --git a/src/IntervalMDP.jl b/src/IntervalMDP.jl index af1fd77b..bf346fec 100644 --- a/src/IntervalMDP.jl +++ b/src/IntervalMDP.jl @@ -47,6 +47,7 @@ export VerificationProblem, ControlSynthesisProblem export value_function, residual, num_iterations include("cuda.jl") +public cu, cpu ### Solving include("algorithms.jl") diff --git a/src/bellman.jl b/src/bellman.jl index 4f6636e4..5a4d7ef8 100644 --- a/src/bellman.jl +++ b/src/bellman.jl @@ -395,7 +395,7 @@ Base.@propagate_inbounds function state_action_bellman( budget, upper_bound, ) - Vp_workspace = @view workspace.values_gaps[1:nnz(ambiguity_set)] + Vp_workspace = @view workspace.values_gaps[1:supportsize(ambiguity_set)] Vnonzero = @view V[support(ambiguity_set)] for (i, (v, p)) in enumerate(zip(Vnonzero, nonzeros(gap(ambiguity_set)))) Vp_workspace[i] = (v, p) @@ -546,7 +546,7 @@ Base.@propagate_inbounds function state_action_bellman( end function marginal_lp_constraints(model, ambiguity_set::IntervalAmbiguitySet{R}) where {R} - p = @variable(model, [1:length(support(ambiguity_set))]) + p = @variable(model, [1:supportsize(ambiguity_set)]) p_lower = map(i -> lower(ambiguity_set, i), support(ambiguity_set)) p_upper = map(i -> upper(ambiguity_set, i), support(ambiguity_set)) for i in eachindex(p) @@ -743,7 +743,7 @@ Base.@propagate_inbounds function orthogonal_inner_bellman!( budget, upper_bound::Bool, ) - Vp_workspace = @view workspace.values_gaps[1:length(support(ambiguity_set))] + Vp_workspace = @view workspace.values_gaps[1:supportsize(ambiguity_set)] @inbounds for (i, j) in enumerate(support(ambiguity_set)) Vp_workspace[i] = (V[j], gap(ambiguity_set, j)) end diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl index d1ead9b8..1e9705eb 100644 --- a/src/models/FactoredRobustMarkovDecisionProcess.jl +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -18,15 +18,37 @@ struct FactoredRobustMarkovDecisionProcess{ action_vars::NTuple{M, Int32}, source_dims::NTuple{N, Int32}, transition::P, - initial_states::VI = nothing, + initial_states::VI, + check::Val{true}, ) where {N, M, P <: NTuple{N, Marginal}, VI <: InitialStates} check_rmdp(state_vars, action_vars, source_dims, transition, initial_states) return new{N, M, P, VI}(state_vars, action_vars, source_dims, transition, initial_states) end + + function FactoredRobustMarkovDecisionProcess( + state_vars::NTuple{N, Int32}, + action_vars::NTuple{M, Int32}, + source_dims::NTuple{N, Int32}, + transition::P, + initial_states::VI, + check::Val{false}, + ) where {N, M, P <: NTuple{N, Marginal}, VI <: InitialStates} + return new{N, M, P, VI}(state_vars, action_vars, source_dims, transition, initial_states) + end end const FactoredRMDP = FactoredRobustMarkovDecisionProcess +function FactoredRMDP( + state_vars::NTuple{N, Int32}, + action_vars::NTuple{M, Int32}, + source_dims::NTuple{N, Int32}, + transition::P, + initial_states::VI = AllStates(), +) where {N, M, P <: NTuple{N, Marginal}, VI <: InitialStates} + return FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, transition, initial_states, Val(true)) +end + function FactoredRMDP( state_vars::NTuple{N, <:Integer}, action_vars::NTuple{M, <:Integer}, diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index 72803611..19ec0709 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -42,13 +42,19 @@ struct IntervalAmbiguitySets{R, MR <: AbstractMatrix{R}} <: PolytopicAmbiguitySe lower::MR gap::MR - function IntervalAmbiguitySets(lower::MR, gap::MR) where {R, MR <: AbstractMatrix{R}} + function IntervalAmbiguitySets(lower::MR, gap::MR, check::Val{true}) where {R, MR <: AbstractMatrix{R}} checkprobabilities(lower, gap) return new{R, MR}(lower, gap) end + + function IntervalAmbiguitySets(lower::MR, gap::MR, check::Val{false}) where {R, MR <: AbstractMatrix{R}} + return new{R, MR}(lower, gap) + end end +IntervalAmbiguitySets(lower::MR, gap::MR) where {R, MR <: AbstractMatrix{R}} = IntervalAmbiguitySets(lower, gap, Val(true)) + # Keyword constructor from lower and upper function IntervalAmbiguitySets(; lower::MR, upper::MR) where {MR <: AbstractMatrix} lower, gap = compute_gap(lower, upper) @@ -171,7 +177,7 @@ source_shape(p::IntervalAmbiguitySets) = (num_sets(p),) action_shape(::IntervalAmbiguitySets) = (1,) marginals(p::IntervalAmbiguitySets) = (p,) -function Base.getindex(p::IntervalAmbiguitySets, j) +function Base.getindex(p::IntervalAmbiguitySets, j::Integer) # Select by columns only! l = @view p.lower[:, j] g = @view p.gap[:, j] @@ -179,14 +185,9 @@ function Base.getindex(p::IntervalAmbiguitySets, j) return IntervalAmbiguitySet(l, g) end -sub2ind(::IntervalAmbiguitySets, jₐ, jₛ) = jₛ -function Base.getindex(p::IntervalAmbiguitySets, jₐ, jₛ) - # Select by columns only! - l = @view p.lower[:, jₛ] - g = @view p.gap[:, jₛ] - - return p[jₛ] -end +sub2ind(::IntervalAmbiguitySets, jₐ::NTuple{M, T}, jₛ::NTuple{N, T}) where {N, M, T <: Integer} = T(jₛ[1]) +sub2ind(p::IntervalAmbiguitySets, jₐ::CartesianIndex, jₛ::CartesianIndex) = sub2ind(p, Tuple(jₐ), Tuple(jₛ)) +Base.getindex(p::IntervalAmbiguitySets, jₐ, jₛ) = p[sub2ind(p, jₐ, jₛ)] Base.iterate(p::IntervalAmbiguitySets) = (p[1], 2) function Base.iterate(p::IntervalAmbiguitySets, state) @@ -216,10 +217,11 @@ gap(p::IntervalAmbiguitySet, destination) = p.gap[destination] const ColumnView{Tv} = SubArray{Tv, 1, <:AbstractMatrix{Tv}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} support(p::IntervalAmbiguitySet{R, <:ColumnView{R}}) where {R} = eachindex(p.gap) +supportsize(p::IntervalAmbiguitySet{R, <:ColumnView{R}}) where {R} = length(p.gap) const SparseColumnView{Tv, Ti} = SubArray{Tv, 1, <:SparseArrays.AbstractSparseMatrixCSC{Tv, Ti}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} support(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) where {R} = rowvals(p.gap) -SparseArrays.nnz(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) where {R} = nnz(p.gap) +supportsize(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) where {R} = nnz(p.gap) # Vertex iterator for IntervalAmbiguitySet struct IntervalAmbiguitySetVertexIterator{R, VR <: AbstractVector{R}, P <: Permutations} diff --git a/src/probabilities/Marginal.jl b/src/probabilities/Marginal.jl index 247eca35..7ff2e499 100644 --- a/src/probabilities/Marginal.jl +++ b/src/probabilities/Marginal.jl @@ -7,19 +7,19 @@ struct Marginal{A <: AbstractAmbiguitySets, N, M, I <: LinearIndices} source_dims::NTuple{N, Int32} action_vars::NTuple{M, Int32} linear_index::I +end - function Marginal( - ambiguity_sets::A, - state_indices::NTuple{N, Int32}, - action_indices::NTuple{M, Int32}, - source_dims::NTuple{N, Int32}, - action_vars::NTuple{M, Int32}, - ) where {A <: AbstractAmbiguitySets, N, M} - checkindices(ambiguity_sets, state_indices, action_indices, source_dims, action_vars) - - linear_index = LinearIndices((action_vars..., source_dims...)) - return new{A, N, M, typeof(linear_index)}(ambiguity_sets, state_indices, action_indices, source_dims, action_vars, linear_index) - end +function Marginal( + ambiguity_sets::A, + state_indices::NTuple{N, Int32}, + action_indices::NTuple{M, Int32}, + source_dims::NTuple{N, Int32}, + action_vars::NTuple{M, Int32}, +) where {A <: AbstractAmbiguitySets, N, M} + checkindices(ambiguity_sets, state_indices, action_indices, source_dims, action_vars) + + linear_index = LinearIndices((action_vars..., source_dims...)) + return Marginal(ambiguity_sets, state_indices, action_indices, source_dims, action_vars, linear_index) end function Marginal( @@ -79,15 +79,13 @@ source_shape(p::Marginal) = p.source_dims action_shape(p::Marginal) = p.action_vars num_target(p::Marginal) = num_target(ambiguity_sets(p)) -function Base.getindex(p::Marginal, source, action) - return ambiguity_sets(p)[sub2ind(p, source, action)] -end +Base.getindex(p::Marginal, source, action) = ambiguity_sets(p)[sub2ind(p, source, action)] sub2ind(p::Marginal, action::CartesianIndex, source::CartesianIndex) = sub2ind(p, Tuple(action), Tuple(source)) -function sub2ind(p::Marginal, action::NTuple{M, <:Integer}, source::NTuple{N, <:Integer}) where {N, M} +function sub2ind(p::Marginal, action::NTuple{M, T}, source::NTuple{N, T}) where {N, M, T <: Integer} action = getindex.((action,), p.action_indices) source = getindex.((source,), p.state_indices) j = p.linear_index[action..., source...] - return j + return T(j) end \ No newline at end of file diff --git a/src/utils.jl b/src/utils.jl index b071c7ca..0c7b7dd4 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,14 +1,17 @@ -arrayfactory(mp::ProductProcess, T, num_states) = - arrayfactory(markov_process(mp), T, num_states) -arrayfactory(mp::FactoredRMDP, T, num_states) = - arrayfactory(marginals(mp)[1], T, num_states) -arrayfactory(marginal::Marginal, T, num_states) = - arrayfactory(ambiguity_sets(marginal), T, num_states) -arrayfactory(prob::IntervalAmbiguitySets, T, num_states) = - arrayfactory(prob.gap, T, num_states) -arrayfactory(::MR, T, num_states) where {MR <: AbstractArray} = Array{T}(undef, num_states) +arrayfactory(mp::ProductProcess, T, sizes) = + arrayfactory(markov_process(mp), T, sizes) +arrayfactory(mp::FactoredRMDP, T, sizes) = + arrayfactory(marginals(mp)[1], T, sizes) +arrayfactory(marginal::Marginal, T, sizes) = + arrayfactory(ambiguity_sets(marginal), T, sizes) +arrayfactory(prob::IntervalAmbiguitySets, T, sizes) = + arrayfactory(prob.gap, T, sizes) +arrayfactory(::MR, T, sizes) where {MR <: AbstractArray} = Array{T}(undef, sizes) valuetype(mp::ProductProcess) = valuetype(markov_process(mp)) valuetype(mp::FactoredRMDP) = promote_type(valuetype.(marginals(mp))...) valuetype(marginal::Marginal) = valuetype(ambiguity_sets(marginal)) valuetype(::IntervalAmbiguitySets{R}) where {R} = R +valuetype(::AbstractArray{R}) where {R} = R +valuetype(::FiniteTimeReward{R}) where {R} = R +valuetype(::InfiniteTimeReward{R}) where {R} = R \ No newline at end of file diff --git a/src/workspace.jl b/src/workspace.jl index 5e24d5a6..c83f94d0 100644 --- a/src/workspace.jl +++ b/src/workspace.jl @@ -99,7 +99,7 @@ struct SparseIntervalOMaxWorkspace{T <: Real} end function SparseIntervalOMaxWorkspace(ambiguity_sets::IntervalAmbiguitySets{R}, nactions) where {R <: Real} - max_support = maximum(nnz, ambiguity_sets) + max_support = maximum(supportsize, ambiguity_sets) budget = 1 .- vec(sum(ambiguity_sets.lower; dims = 1)) scratch = Vector{Tuple{R, R}}(undef, max_support) diff --git a/test/base/imdp.jl b/test/base/imdp.jl index 1b94a3f2..a58489fd 100644 --- a/test/base/imdp.jl +++ b/test/base/imdp.jl @@ -50,6 +50,7 @@ using IntervalMDP mdp = IntervalMarkovDecisionProcess(transition_probs) @testset "bellman" begin + # TODO: Add tests with min upper bound V = N[1, 2, 3] Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true) @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] diff --git a/test/cuda/cuda.jl b/test/cuda/cuda.jl index f809cce9..38e7a8f6 100644 --- a/test/cuda/cuda.jl +++ b/test/cuda/cuda.jl @@ -18,11 +18,11 @@ test_files = [ "dense/bellman.jl", "dense/vi.jl", "dense/imdp.jl", - "dense/synthesis.jl", - "sparse/bellman.jl", - "sparse/vi.jl", - "sparse/imdp.jl", - "sparse/synthesis.jl", + # "dense/synthesis.jl", + # "sparse/bellman.jl", + # "sparse/vi.jl", + # "sparse/imdp.jl", + # "sparse/synthesis.jl", ] if CUDA.functional() diff --git a/test/cuda/dense/bellman.jl b/test/cuda/dense/bellman.jl index 6ef3122c..071de15e 100644 --- a/test/cuda/dense/bellman.jl +++ b/test/cuda/dense/bellman.jl @@ -1,50 +1,46 @@ using Revise, Test -using IntervalMDP, SparseArrays, CUDA +using IntervalMDP, CUDA -for N in [Float32, Float64] - @testset "N = $N" begin - prob = IntervalProbabilities(; - lower = N[0 1//2; 1//10 3//10; 2//10 1//10], - upper = N[5//10 7//10; 6//10 5//10; 7//10 3//10], - ) - prob = IntervalMDP.cu(prob) +@testset for N in [Float32, Float64] + prob = IntervalAmbiguitySets(; + lower = N[0 1//2; 1//10 3//10; 2//10 1//10], + upper = N[5//10 7//10; 6//10 5//10; 7//10 3//10], + ) + prob = IntervalMDP.cu(prob) - V = IntervalMDP.cu(N[1, 2, 3]) + V = IntervalMDP.cu(N[1, 2, 3]) - #### Maximization - @testset "maximization" begin - ws = IntervalMDP.construct_workspace(prob) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = CUDA.zeros(N, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = true, - ) - Vres = Vector(Vres) # Convert to CPU for testing - @test Vres ≈ N[27 // 10, 17 // 10] # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] - end + #### Maximization + @testset "maximization" begin + ws = IntervalMDP.construct_workspace(prob, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + Vres = CUDA.zeros(N, 2) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres, + V, + prob; + upper_bound = true, + ) + Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing + @test Vres ≈ N[27 // 10, 17 // 10] # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] + end - #### Minimization - @testset "minimization" begin - ws = IntervalMDP.construct_workspace(prob) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = CUDA.zeros(N, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = false, - ) - Vres = Vector(Vres) # Convert to CPU for testing - @test Vres ≈ N[17 // 10, 15 // 10] # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3] - end + #### Minimization + @testset "minimization" begin + ws = IntervalMDP.construct_workspace(prob, OMaximization()) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + Vres = CUDA.zeros(N, 2) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + Vres, + V, + prob; + upper_bound = false, + ) + Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing + @test Vres ≈ N[17 // 10, 15 // 10] # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3] end -end +end \ No newline at end of file diff --git a/test/cuda/dense/imdp.jl b/test/cuda/dense/imdp.jl index 42ea0f58..fecf528c 100644 --- a/test/cuda/dense/imdp.jl +++ b/test/cuda/dense/imdp.jl @@ -1,511 +1,664 @@ using Revise, Test -using IntervalMDP, SparseArrays, CUDA - -prob1 = IntervalProbabilities(; - lower = [ - 0.0 0.5 - 0.1 0.3 - 0.2 0.1 - ], - upper = [ - 0.5 0.7 - 0.6 0.5 - 0.7 0.3 - ], -) - -prob2 = IntervalProbabilities(; - lower = [ - 0.1 0.2 - 0.2 0.3 - 0.3 0.4 - ], - upper = [ - 0.6 0.6 - 0.5 0.5 - 0.4 0.4 - ], -) - -prob3 = IntervalProbabilities(; lower = [ - 0.0 - 0.0 - 1.0 -][:, :], upper = [ - 0.0 - 0.0 - 1.0 -][:, :]) - -transition_probs = [prob1, prob2, prob3] -istates = [Int32(1)] - -mdp = IntervalMDP.cu(IntervalMarkovDecisionProcess(transition_probs, istates)) -@test Vector(initial_states(mdp)) == istates - -mdp = IntervalMDP.cu(IntervalMarkovDecisionProcess(transition_probs)) - -@testset "explicit sink state" begin - transition_prob, _ = IntervalMDP.interval_prob_hcat(transition_probs) - @test_throws DimensionMismatch IntervalMarkovChain(transition_prob) - - # Finite time reachability - @testset "finite time reachability" begin - prop = FiniteTimeReachability([3], 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - V_fixed_it1 = Vector(V_fixed_it1) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - @test V_fixed_it1[3] == 1.0 - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - V_fixed_it2 = Vector(V_fixed_it2) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - @test V_fixed_it2[3] == 1.0 - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - V_fixed_it1 = Vector(V_fixed_it1) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - @test V_fixed_it1[3] == 1.0 - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - V_fixed_it2 = Vector(V_fixed_it2) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - @test V_fixed_it2[3] == 1.0 +using IntervalMDP, CUDA + +@testset for N in [Float32, Float64] + prob1 = IntervalAmbiguitySets(; + lower = N[ + 0 1//2 + 1//10 3//10 + 1//5 1//10 + ], + upper = N[ + 1//2 7//10 + 3//5 1//2 + 7//10 3//10 + ], + ) + + prob2 = IntervalAmbiguitySets(; + lower = N[ + 1//10 1//5 + 1//5 3//10 + 3//10 2//5 + ], + upper = N[ + 3//5 3//5 + 1//2 1//2 + 2//5 2//5 + ], + ) + + prob3 = IntervalAmbiguitySets(; + lower = N[ + 0 0 + 0 0 + 1 1 + ], + upper = N[ + 0 0 + 0 0 + 1 1 + ] + ) + + transition_probs = [prob1, prob2, prob3] + istates = [1] + + mdp = IntervalMDP.cu(IntervalMarkovDecisionProcess(transition_probs, istates)) + @test IntervalMDP.cpu(initial_states(mdp)) == istates + + mdp = IntervalMDP.cu(IntervalMarkovDecisionProcess(transition_probs)) + + @testset "bellman" begin + V = IntervalMDP.cu(N[1, 2, 3]) + Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true) + Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing + @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] + + Vres = IntervalMDP.cu(similar(Vres)) + IntervalMDP.bellman!(Vres, V, mdp; upper_bound = false, maximize = true) + Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing + @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] end - # Infinite time reachability - @testset "infinite time reachability" begin - prop = InfiniteTimeReachability([3], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv, _, u = solve(problem) - V_conv = Vector(V_conv) # Convert to CPU for testing - @test maximum(u) <= 1e-6 - @test all(V_conv .>= 0.0) - @test V_conv[3] == 1.0 + @testset "explicit sink state" begin + transition_prob = IntervalMDP.interval_prob_hcat(transition_probs) + @test_throws DimensionMismatch IntervalMarkovChain(transition_prob) + + # Finite time reachability + @testset "finite time reachability" begin + prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test V_fixed_it1[3] == N(1) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test V_fixed_it1[3] == N(1) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + end + + # Infinite time reachability + @testset "infinite time reachability" begin + prop = InfiniteTimeReachability([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test V_conv[3] == N(1) + end + + # Exact time reachability + @testset "exact time reachability" begin + prop = ExactTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + + # Compare exact time to finite time + prop = ExactTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + + prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + end + + # Finite time reach avoid + @testset "finite time reach/avoid" begin + prop = FiniteTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + @test V_fixed_it1[3] == N(1) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + @test V_fixed_it2[2] == N(0) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + @test V_fixed_it1[3] == N(1) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + @test V_fixed_it2[2] == N(0) + end + + # Infinite time reach avoid + @testset "infinite time reach/avoid" begin + prop = InfiniteTimeReachAvoid([3], [2], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + @test V_conv[3] == N(1) + @test V_conv[2] == N(0) + end + + # Exact time reach avoid + @testset "exact time reach/avoid" begin + prop = ExactTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[2] == N(0) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[2] == N(0) + + # Compare exact time to finite time + prop = ExactTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + + prop = FiniteTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + end + + # Finite time reward + @testset "finite time reward" begin + prop = IntervalMDP.cu(FiniteTimeReward(N[2, 1, 0], N(9//10), 10)) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + end + + # Infinite time reward + @testset "infinite time reward" begin + prop = IntervalMDP.cu(InfiniteTimeReward(N[2, 1, 0], N(9//10), N(1//1_000_000))) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + end + + # Expected exit time + @testset "expected exit time" begin + prop = ExpectedExitTime([3], N(1//1_000_000)) + + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv1, _, u = solve(problem) + V_conv1 = IntervalMDP.cpu(V_conv1) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .>= N(0)) + @test V_conv1[3] == N(0) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv2, _, u = solve(problem) + V_conv2 = IntervalMDP.cpu(V_conv2) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .<= V_conv2) + @test V_conv2[3] == N(0) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_conv1, _, u = solve(problem) + V_conv1 = IntervalMDP.cpu(V_conv1) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .>= N(0)) + @test V_conv1[3] == N(0) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_conv2, _, u = solve(problem) + V_conv2 = IntervalMDP.cpu(V_conv2) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .<= V_conv2) + @test V_conv2[3] == N(0) + end end - # Finite time reach avoid - @testset "finite time reach/avoid" begin - prop = FiniteTimeReachAvoid([3], [2], 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - V_fixed_it1 = Vector(V_fixed_it1) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - @test V_fixed_it1[3] == 1.0 - @test V_fixed_it1[2] == 0.0 - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - V_fixed_it2 = Vector(V_fixed_it2) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - @test V_fixed_it2[3] == 1.0 - @test V_fixed_it2[2] == 0.0 - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - V_fixed_it1 = Vector(V_fixed_it1) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - @test V_fixed_it1[3] == 1.0 - @test V_fixed_it1[2] == 0.0 - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - V_fixed_it2 = Vector(V_fixed_it2) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - @test V_fixed_it2[3] == 1.0 - @test V_fixed_it2[2] == 0.0 - end + @testset "implicit sink state" begin + transition_probs = [prob1, prob2] + implicit_mdp = IntervalMDP.cu(IntervalMarkovDecisionProcess(transition_probs)) - # Infinite time reach avoid - @testset "infinite time reach/avoid" begin - prop = InfiniteTimeReachAvoid([3], [2], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv, _, u = solve(problem) - V_conv = Vector(V_conv) # Convert to CPU for testing - @test maximum(u) <= 1e-6 - @test all(V_conv .>= 0.0) - @test V_conv[3] == 1.0 - @test V_conv[2] == 0.0 - end + # Finite time reachability + @testset "finite time reachability" begin + prop = FiniteTimeReachability([3], 10) - # Finite time reward - @testset "finite time reward" begin - prop = FiniteTimeReward(IntervalMDP.cu([2.0, 1.0, 0.0]), 0.9, 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - V_fixed_it1 = Vector(V_fixed_it1) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - V_fixed_it2 = Vector(V_fixed_it2) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - V_fixed_it1 = Vector(V_fixed_it1) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - V_fixed_it2 = Vector(V_fixed_it2) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - end + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - # Infinite time reward - @testset "infinite time reward" begin - prop = InfiniteTimeReward(IntervalMDP.cu([2.0, 1.0, 0.0]), 0.9, 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv, _, u = solve(problem) - V_conv = Vector(V_conv) # Convert to CPU for testing - @test maximum(u) <= 1e-6 - @test all(V_conv .>= 0.0) - end + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - # Expected exit time - @testset "expected exit time" begin - prop = ExpectedExitTime([3], 1e-6) - - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv1, _, u = solve(problem) - V_conv1 = Vector(V_conv1) # Convert to CPU for testing - @test maximum(u) <= 1e-6 - @test all(V_conv1 .>= 0.0) - @test V_conv1[3] == 0.0 - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv2, _, u = solve(problem) - V_conv2 = Vector(V_conv2) # Convert to CPU for testing - @test maximum(u) <= 1e-6 - @test all(V_conv1 .<= V_conv2) - @test V_conv2[3] == 0.0 - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_conv1, _, u = solve(problem) - V_conv1 = Vector(V_conv1) # Convert to CPU for testing - @test maximum(u) <= 1e-6 - @test all(V_conv1 .>= 0.0) - @test V_conv1[3] == 0.0 - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_conv2, _, u = solve(problem) - V_conv2 = Vector(V_conv2) # Convert to CPU for testing - @test maximum(u) <= 1e-6 - @test all(V_conv1 .<= V_conv2) - @test V_conv2[3] == 0.0 - end -end - -@testset "implicit sink state" begin - transition_probs = [prob1, prob2] - implicit_mdp = IntervalMDP.cu(IntervalMarkovDecisionProcess(transition_probs)) - - # Finite time reachability - @testset "finite time reachability" begin - prop = FiniteTimeReachability([3], 10) - - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 - # Infinite time reachability - @testset "infinite time reachability" begin - prop = InfiniteTimeReachability([3], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - # Finite time reach avoid - @testset "finite time reach/avoid" begin - prop = FiniteTimeReachAvoid([3], [2], 10) - - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - # Infinite time reach avoid - @testset "infinite time reach/avoid" begin - prop = InfiniteTimeReachAvoid([3], [2], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end - # Finite time reward - @testset "finite time reward" begin - prop = FiniteTimeReward(IntervalMDP.cu([2.0, 1.0, 0.0]), 0.9, 10) - - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + # Infinite time reachability + @testset "infinite time reachability" begin + prop = InfiniteTimeReachability([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) - # Infinite time reward - @testset "infinite time reward" begin - prop = InfiniteTimeReward(IntervalMDP.cu([2.0, 1.0, 0.0]), 0.9, 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end + + # Exact time reachability + @testset "exact time reachability" begin + prop = ExactTimeReachability([3], 10) + + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end + + # Finite time reach avoid + @testset "finite time reach/avoid" begin + prop = FiniteTimeReachAvoid([3], [2], 10) + + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end + + # Infinite time reach avoid + @testset "infinite time reach/avoid" begin + prop = InfiniteTimeReachAvoid([3], [2], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) + + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end + + # Exact time reach avoid + @testset "exact time reach/avoid" begin + prop = ExactTimeReachAvoid([3], [2], 10) + + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end + + # Finite time reward + @testset "finite time reward" begin + prop = IntervalMDP.cu(FiniteTimeReward(N[2, 1, 0], N(9//10), 10)) + + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end + + # Infinite time reward + @testset "infinite time reward" begin + prop = IntervalMDP.cu(InfiniteTimeReward(N[2, 1, 0], N(9//10), N(1//1_000_000))) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end - # Expected exit time - @testset "expected exit time" begin - prop = ExpectedExitTime([3], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) + # Expected exit time + @testset "expected exit time" begin + prop = ExpectedExitTime([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end end -end +end \ No newline at end of file diff --git a/test/cuda/dense/synthesis.jl b/test/cuda/dense/synthesis.jl index 673d1823..b62b666a 100644 --- a/test/cuda/dense/synthesis.jl +++ b/test/cuda/dense/synthesis.jl @@ -1,7 +1,8 @@ using Revise, Test -using IntervalMDP, SparseArrays, CUDA +using IntervalMDP, CUDA -prob1 = IntervalProbabilities(; + +prob1 = IntervalAmbiguitySets(; lower = [ 0.0 0.5 0.1 0.3 @@ -14,7 +15,7 @@ prob1 = IntervalProbabilities(; ], ) -prob2 = IntervalProbabilities(; +prob2 = IntervalAmbiguitySets(; lower = [ 0.1 0.2 0.2 0.3 @@ -27,32 +28,42 @@ prob2 = IntervalProbabilities(; ], ) -prob3 = IntervalProbabilities(; lower = [ - 0.0 - 0.0 - 1.0 -][:, :], upper = [ - 0.0 - 0.0 - 1.0 -][:, :]) +prob3 = IntervalAmbiguitySets(; + lower = [ + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ], + upper = [ + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ] +) transition_probs = [prob1, prob2, prob3] istates = [Int32(1)] -mdp = IntervalMarkovDecisionProcess(transition_probs, istates) -mdp = IntervalMDP.cu(mdp) +mdp = IntervalMDP.cu(IntervalMarkovDecisionProcess(transition_probs, istates)) # Finite time reachability prop = FiniteTimeReachability([3], 10) spec = Specification(prop, Pessimistic, Maximize) problem = ControlSynthesisProblem(mdp, spec) -policy, V, k, res = solve(problem) +sol = solve(problem) +policy, V, k, res = sol + +@test strategy(sol) == policy +@test value_function(sol) == V +@test num_iterations(sol) == k +@test residual(sol) == res + +policy = IntervalMDP.cpu(policy) # Convert to CPU for testing @test policy isa TimeVaryingStrategy @test time_length(policy) == 10 for k in 1:time_length(policy) - @test Vector(policy[k]) == [1, 2, 1] + @test policy[k] == [(1,), (2,), (1,)] end # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP @@ -62,21 +73,22 @@ V_mc, k, res = solve(problem) # Finite time reward prop = FiniteTimeReward([2.0, 1.0, 0.0], 0.9, 10) -prop = IntervalMDP.cu(prop) spec = Specification(prop, Pessimistic, Maximize) problem = ControlSynthesisProblem(mdp, spec) - policy, V, k, res = solve(problem) +policy = IntervalMDP.cpu(policy) # Convert to CPU for testing + +@test policy isa TimeVaryingStrategy @test time_length(policy) == 10 for k in 1:time_length(policy) - @test Vector(policy[k]) == [2, 2, 1] + @test policy[k] == [(2,), (2,), (1,)] end # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP problem = VerificationProblem(mdp, spec, policy) V_mc, k, res = solve(problem) -@test V ≈ V_mc +@test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_mc) atol=1e-5 # Infinite time reachability prop = InfiniteTimeReachability([3], 1e-6) @@ -84,13 +96,15 @@ spec = Specification(prop, Pessimistic, Maximize) problem = ControlSynthesisProblem(mdp, spec) policy, V, k, res = solve(problem) +policy = IntervalMDP.cpu(policy) # Convert to CPU for testing + @test policy isa StationaryStrategy -@test IntervalMDP.cpu(policy)[1] == [1, 2, 1] +@test policy[1] == [(1,), (2,), (1,)] # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP problem = VerificationProblem(mdp, spec, policy) V_mc, k, res = solve(problem) -@test V ≈ V_mc +@test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_mc) atol=1e-5 # Finite time safety prop = FiniteTimeSafety([3], 10) @@ -98,14 +112,36 @@ spec = Specification(prop, Pessimistic, Maximize) problem = ControlSynthesisProblem(mdp, spec) policy, V, k, res = solve(problem) +policy = IntervalMDP.cpu(policy) # Convert to CPU for testing +V = IntervalMDP.cpu(V) # Convert to CPU for testing + @test all(V .>= 0.0) -@test CUDA.@allowscalar(V[3]) ≈ 0.0 +@test V[3] ≈ 0.0 @test policy isa TimeVaryingStrategy @test time_length(policy) == 10 for k in 1:(time_length(policy) - 1) - @test Vector(policy[k]) == [2, 2, 1] + @test policy[k] == [(2,), (2,), (1,)] end # The last time step (aka. the first value iteration step) has a different strategy. -@test Vector(policy[time_length(policy)]) == [2, 1, 1] +@test policy[time_length(policy)] == [(2,), (1,), (1,)] + +@testset "implicit sink state" begin + transition_probs = [prob1, prob2] + mdp = IntervalMarkovDecisionProcess(transition_probs) + + # Finite time reachability + prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = ControlSynthesisProblem(mdp, spec) + policy, V, k, res = solve(problem) + + policy = IntervalMDP.cpu(policy) # Convert to CPU for testing + + @test policy isa TimeVaryingStrategy + @test time_length(policy) == 10 + for k in 1:time_length(policy) + @test policy[k] == [(1,), (2,)] + end +end diff --git a/test/cuda/dense/vi.jl b/test/cuda/dense/vi.jl index 1640dd77..11c4e8ee 100644 --- a/test/cuda/dense/vi.jl +++ b/test/cuda/dense/vi.jl @@ -1,110 +1,195 @@ using Revise, Test -using IntervalMDP, SparseArrays, CUDA - -prob = IntervalProbabilities(; - lower = [ - 0.0 0.5 0.0 - 0.1 0.3 0.0 - 0.2 0.1 1.0 - ], - upper = [ - 0.5 0.7 0.0 - 0.6 0.5 0.0 - 0.7 0.3 1.0 - ], -) - -mc = IntervalMDP.cu(IntervalMarkovChain(prob, [1])) - -prop = FiniteTimeReachability([3], 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -prop = FiniteTimeReachability([3], 11) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it2, k, _ = solve(problem) -@test k == 11 -@test all(V_fixed_it2 .>= 0.0) -@test all(V_fixed_it .<= V_fixed_it2) - -prop = InfiniteTimeReachability([3], 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = FiniteTimeReachAvoid([3], [2], 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -prop = FiniteTimeReachAvoid([3], [2], 11) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it2, k, _ = solve(problem) -@test k == 11 -@test all(V_fixed_it2 .>= 0.0) -@test all(V_fixed_it .<= V_fixed_it2) - -prop = InfiniteTimeReachAvoid([3], [2], 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = FiniteTimeSafety([3], 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -prop = FiniteTimeSafety([3], 11) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it2, k, _ = solve(problem) -@test k == 11 -@test all(V_fixed_it2 .>= 0.0) -@test all(V_fixed_it2 .<= V_fixed_it) - -prop = InfiniteTimeSafety([3], 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = IntervalMDP.cu(FiniteTimeReward([2.0, 1.0, 0.0], 0.9, 10)) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -prop = IntervalMDP.cu(FiniteTimeReward([2.0, 1.0, -1.0], 0.9, 10)) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it2, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it2 .<= V_fixed_it) - -prop = IntervalMDP.cu(InfiniteTimeReward([2.0, 1.0, 0.0], 0.9, 1e-6)) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = IntervalMDP.cu(InfiniteTimeReward([2.0, 1.0, -1.0], 0.9, 1e-6)) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 +using IntervalMDP, CUDA + + +@testset for N in [Float32, Float64] + prob = IntervalAmbiguitySets(; + lower = N[ + 0 1//2 0 + 1//10 3//10 0 + 1//5 1//10 1 + ], + upper = N[ + 1//2 7//10 0 + 3//5 1//2 0 + 7//10 3//10 1 + ], + ) + + mc = IntervalMDP.cu(IntervalMarkovChain(prob, [1])) + @test IntervalMDP.cpu(initial_states(mc)) == [1] + + mc = IntervalMDP.cu(IntervalMarkovChain(prob)) + mc_cpu = IntervalMarkovChain(prob) # For comparison + + prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + sol = solve(problem) + V_fixed_it, k, res = sol + + @test value_function(sol) == V_fixed_it + @test num_iterations(sol) == k + @test residual(sol) == res + + V_fixed_it = IntervalMDP.cpu(V_fixed_it) # Convert to CPU for testing + @test k == 10 + @test all(V_fixed_it .>= N(0)) + @test all(V_fixed_it .<= N(1)) + + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = FiniteTimeReachability([3], 11) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, res = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) # Convert to CPU for testing + @test k == 11 + @test all(V_fixed_it .<= V_fixed_it2) + + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it2 ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = InfiniteTimeReachability([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) # Convert to CPU for testing + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + + prop = FiniteTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it, k, res = solve(problem) + V_fixed_it = IntervalMDP.cpu(V_fixed_it) # Convert to CPU for testing + @test k == 10 + @test all(V_fixed_it .>= N(0)) + @test all(V_fixed_it .<= N(1)) + + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = FiniteTimeReachAvoid([3], [2], 11) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, res = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) # Convert to CPU for testing + @test k == 11 + @test all(V_fixed_it2 .>= N(0)) + @test all(V_fixed_it2 .<= N(1)) + @test all(V_fixed_it .<= V_fixed_it2) + + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it2 ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = InfiniteTimeReachAvoid([3], [2], N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) # Convert to CPU for testing + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + + prop = FiniteTimeSafety([3], 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it, k, res = solve(problem) + V_fixed_it = IntervalMDP.cpu(V_fixed_it) # Convert to CPU for testing + @test k == 10 + @test all(V_fixed_it .>= N(0)) + @test all(V_fixed_it .<= N(1)) + + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = FiniteTimeSafety([3], 11) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, res = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) # Convert to CPU for testing + @test k == 11 + @test all(V_fixed_it2 .>= N(0)) + @test all(V_fixed_it2 .<= N(1)) + @test all(V_fixed_it2 .<= V_fixed_it) + + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it2 ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = InfiniteTimeSafety([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) # Convert to CPU for testing + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + + prop = IntervalMDP.cu(FiniteTimeReward(N[2, 1, 0], N(9//10), 10)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it, k, res = solve(problem) + V_fixed_it = IntervalMDP.cpu(V_fixed_it) # Convert to CPU for testing + @test k == 10 + @test all(V_fixed_it .>= N(0)) + + prop = FiniteTimeReward(N[2, 1, 0], N(9//10), 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = IntervalMDP.cu(FiniteTimeReward(N[2, 1, -1], N(9//10), 10)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, res = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) # Convert to CPU for testing + @test k == 10 + @test all(V_fixed_it2 .<= V_fixed_it) + + prop = FiniteTimeReward(N[2, 1, -1], N(9//10), 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it2 ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = IntervalMDP.cu(InfiniteTimeReward(N[2, 1, 0], N(9//10), N(1//1_000_000))) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) # Convert to CPU for testing + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + + prop = IntervalMDP.cu(InfiniteTimeReward(N[2, 1, -1], N(9//10), N(1//1_000_000))) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) # Convert to CPU for testing + @test maximum(u) <= N(1//1_000_000) +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 7f85a7d8..67a5b9b5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,5 +4,5 @@ using Test @testset verbose = true "base" include("base/base.jl") @testset verbose = true "sparse" include("sparse/sparse.jl") @testset verbose = true "data" include("data/data.jl") - # @testset verbose = true "cuda" include("cuda/cuda.jl") + @testset verbose = true "cuda" include("cuda/cuda.jl") end From dc6d857e08a9ad27ae46c13f8713459745a4dcb9 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Fri, 12 Sep 2025 11:39:26 +0200 Subject: [PATCH 12/71] Prettify output of FactoredRMDPs including inferred properties --- Project.toml | 2 + src/IntervalMDP.jl | 1 + src/algorithms.jl | 3 +- .../FactoredRobustMarkovDecisionProcess.jl | 119 ++++++++++++++++-- test/sparse/imdp.jl | 2 +- test/sparse/vi.jl | 3 - 6 files changed, 113 insertions(+), 17 deletions(-) diff --git a/Project.toml b/Project.toml index 1b7c9882..6c7236e1 100644 --- a/Project.toml +++ b/Project.toml @@ -12,6 +12,7 @@ JuMP = "4076af6c-e467-56ae-b986-b466b2749572" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b" [weakdeps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" @@ -33,6 +34,7 @@ JSON = "0.21.4" JuMP = "1.29.0" LLVM = "7, 8, 9" NCDatasets = "0.13, 0.14" +StyledStrings = "1.11.0" julia = "1.9" [extras] diff --git a/src/IntervalMDP.jl b/src/IntervalMDP.jl index bf346fec..9cf04a64 100644 --- a/src/IntervalMDP.jl +++ b/src/IntervalMDP.jl @@ -9,6 +9,7 @@ export solve using LinearAlgebra, SparseArrays using JuMP, HiGHS using Combinatorics: permutations, Permutations +using StyledStrings ### Utilities const UnionIndex = Union{<:Integer, <:Tuple} diff --git a/src/algorithms.jl b/src/algorithms.jl index f1d3bfb0..03ec0f0d 100644 --- a/src/algorithms.jl +++ b/src/algorithms.jl @@ -37,7 +37,8 @@ struct IntervalValueIteration <: ModelCheckingAlgorithm end # TODO: Consider topological value iteration as an alternative algorithm (infinite time only). ##### Default algorithm for solving Interval MDP problems -default_algorithm(problem::AbstractIntervalMDPProblem) = RobustValueIteration(default_bellman_algorithm(system(problem))) +default_algorithm(problem::AbstractIntervalMDPProblem) = default_algorithm(system(problem)) +default_algorithm(system::StochasticProcess) = RobustValueIteration(default_bellman_algorithm(system)) solve(problem::AbstractIntervalMDPProblem; kwargs...) = solve(problem, default_algorithm(problem); kwargs...) diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl index 1e9705eb..2461fd24 100644 --- a/src/models/FactoredRobustMarkovDecisionProcess.jl +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -130,19 +130,19 @@ function check_initial_states(state_vars, initial_states) end end -state_variables(rmdp::FactoredRMDP) = rmdp.state_vars -state_variables(rmdp::FactoredRMDP, r) = rmdp.state_vars[r] -action_variables(rmdp::FactoredRMDP) = rmdp.action_vars -num_states(rmdp::FactoredRMDP) = prod(state_variables(rmdp)) -num_actions(rmdp::FactoredRMDP) = prod(action_variables(rmdp)) -marginals(rmdp::FactoredRMDP) = rmdp.transition -initial_states(rmdp::FactoredRMDP) = rmdp.initial_states +state_variables(mdp::FactoredRMDP) = mdp.state_vars +state_variables(mdp::FactoredRMDP, r) = mdp.state_vars[r] +action_variables(mdp::FactoredRMDP) = mdp.action_vars +num_states(mdp::FactoredRMDP) = prod(state_variables(mdp)) +num_actions(mdp::FactoredRMDP) = prod(action_variables(mdp)) +marginals(mdp::FactoredRMDP) = mdp.transition +initial_states(mdp::FactoredRMDP) = mdp.initial_states source_shape(m::FactoredRMDP) = m.source_dims action_shape(m::FactoredRMDP) = m.action_vars -function Base.getindex(rmdp::FactoredRMDP, r) - return rmdp.transition[r] +function Base.getindex(mdp::FactoredRMDP, r) + return mdp.transition[r] end ### Model type analysis @@ -158,19 +158,114 @@ struct IsFPMDP <: ModelType end # Factored Polytopic MDP struct IsFRMDP <: ModelType end # Factored Robust MDP # Single marginal - special case -modeltype(rmdp::FactoredRMDP{1}) = modeltype(rmdp, isinterval(rmdp.transition[1])) +modeltype(mdp::FactoredRMDP{1}) = modeltype(mdp, isinterval(mdp.transition[1])) modeltype(::FactoredRMDP{1}, ::IsInterval) = IsIMDP() modeltype(::FactoredRMDP{1}, ::IsNotInterval) = IsRMDP() # General factored case # Check if all marginals are interval ambiguity sets -modeltype(rmdp::FactoredRMDP{N}) where {N} = modeltype(rmdp, isinterval.(rmdp.transition)) +modeltype(mdp::FactoredRMDP{N}) where {N} = modeltype(mdp, isinterval.(mdp.transition)) modeltype(::FactoredRMDP{N}, ::NTuple{N, IsInterval}) where {N} = IsFIMDP() # If not, check if all marginals are polytopic ambiguity sets -modeltype(::FactoredRMDP{N}, ::NTuple{N, AbstractIsInterval}) where {N} = modeltype(rmdp, ispolytopic.(rmdp.transition)) +modeltype(::FactoredRMDP{N}, ::NTuple{N, AbstractIsInterval}) where {N} = modeltype(mdp, ispolytopic.(mdp.transition)) modeltype(::FactoredRMDP{N}, ::NTuple{N, IsPolytopic}) where {N} = IsFPMDP() # Otherwise, it is a general factored robust MDP modeltype(::FactoredRMDP{N}, ::NTuple{N, AbstractIsPolytopic}) where {N} = IsFRMDP() + + +### Pretty printing +function Base.show(io::IO, mime::MIME"text/plain", mdp::FactoredRMDP{N, M}) where {N, M} + println(io, styled"{code:FactoredRobustMarkovDecisionProcess}") + println(io, "├─ ", N, styled" state variables with cardinality: {magenta:$(state_variables(mdp))}") + println(io, "├─ ", M, styled" action variables with cardinality: {magenta:$(action_variables(mdp))}") + if initial_states(mdp) isa AllStates + println(io, "├─ ", styled"Initial states: {magenta:All states}") + else + println(io, "├─ ", styled"Initial states: {magenta:$(initial_states(mdp))}") + end + + println(io, "├─ ", styled"Transition marginals:") + prefix = "│ " + for (i, marginal) in enumerate(mdp.transition[1:end - 1]) + println(io, prefix, "├─ Marginal $i: ") + showmarginal(io, prefix * "│ ", marginal) + end + println(io, prefix, "└─ Marginal $(length(mdp.transition)): ") + showmarginal(io, prefix * " ", mdp.transition[end]) + + showinferred(io, mdp) +end + +function showmarginal(io::IO, prefix, marginal::Marginal{<:IntervalAmbiguitySets{R, MR}}) where {R, MR <: AbstractMatrix} + println(io, prefix, styled"├─ Ambiguity set type: Interval (dense, {code:$MR})") + println(io, prefix, styled"└─ Conditional variables: {magenta:states = $(state_variables(marginal)), actions = $(action_variables(marginal))}") +end + +function showmarginal(io::IO, prefix, marginal::Marginal{<:IntervalAmbiguitySets{R, MR}}) where {R, MR <: AbstractSparseMatrix} + println(io, prefix, styled"├─ Ambiguity set type: Interval (sparse, {code:$MR})") + println(io, prefix, styled"├─ Conditional variables: {magenta:states = $(state_variables(marginal)), actions = $(action_variables(marginal))}") + num_transitions = nnz(ambiguity_sets(marginal).gap) + max_support = maximum(supportsize, ambiguity_sets(marginal)) + println(io, prefix, styled"└─ Transitions: {magenta: $num_transitions (max support: $max_support)}") +end + +function showinferred(io::IO, mdp::FactoredRMDP) + println(io, "└─", styled"{red:Inferred properties}") + prefix = " " + showmodeltype(io, prefix, mdp) + println(io, prefix, "├─", styled"Number of states: {green:$(num_states(mdp))}") + println(io, prefix, "├─", styled"Number of actions: {green:$(num_actions(mdp))}") + + default_alg = default_algorithm(mdp) + showmcalgorithm(io, prefix, default_alg) + showbellmanalg(io, prefix, modeltype(mdp), bellman_algorithm(default_alg)) +end + +showmodeltype(io::IO, prefix, mdp::FactoredRMDP) = showmodeltype(io, prefix, modeltype(mdp)) + +function showmodeltype(io::IO, prefix, ::IsFIMDP) + println(io, prefix, "├─", styled"Model type: {green:Factored Interval MDP}") +end + +function showmodeltype(io::IO, prefix, ::IsFPMDP) + println(io, prefix, "├─", styled"Model type: {green:Factored Polytopic MDP}") +end + +function showmodeltype(io::IO, prefix, ::IsFRMDP) + println(io, prefix, "├─", styled"Model type: {green:Factored Robust MDP}") +end + +function showmodeltype(io::IO, prefix, ::IsIMDP) + println(io, prefix, "├─", styled"Model type: {green:Interval MDP}") +end + +function showmodeltype(io::IO, prefix, ::IsRMDP) + println(io, prefix, "├─", styled"Model type: {green:Robust MDP}") +end + +function showmcalgorithm(io::IO, prefix, ::RobustValueIteration) + println(io, prefix,"├─", styled"Default model checking algorithm: {green:Robust Value Iteration}") +end + +function showmcalgorithm(io::IO, prefix, _) + println(io, prefix,"├─", styled"Default model checking algorithm: {green:None}") +end + +function showbellmanalg(io::IO, prefix, ::IsIMDP,::OMaximization) + println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:O-Maximization}") +end + +function showbellmanalg(io::IO, prefix, ::IsFIMDP,::OMaximization) + println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:Recursive O-Maximization}") +end + +function showbellmanalg(io::IO, prefix, ::IsFIMDP, ::LPMcCormickRelaxation) + println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:Binary tree LP McCormick Relaxation}") +end + +function showbellmanalg(io::IO, prefix, _, _) + println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:None}") +end \ No newline at end of file diff --git a/test/sparse/imdp.jl b/test/sparse/imdp.jl index ae862fb7..573e9677 100644 --- a/test/sparse/imdp.jl +++ b/test/sparse/imdp.jl @@ -1,5 +1,5 @@ using Revise, Test -using IntervalMDP +using IntervalMDP, SparseArrays @testset for N in [Float32, Float64, Rational{BigInt}] diff --git a/test/sparse/vi.jl b/test/sparse/vi.jl index e924044c..1d4eae8d 100644 --- a/test/sparse/vi.jl +++ b/test/sparse/vi.jl @@ -1,9 +1,6 @@ using Revise, Test using IntervalMDP, SparseArrays -using Revise, Test -using IntervalMDP - @testset for N in [Float32, Float64, Rational{BigInt}] prob = IntervalAmbiguitySets(; lower = sparse_hcat( From dc406c8e91497e42dd49f80d039c072d9c6f8366 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 15 Sep 2025 09:48:37 +0200 Subject: [PATCH 13/71] Fix valuetype --- src/utils.jl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/utils.jl b/src/utils.jl index 0c7b7dd4..76a9f160 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -8,10 +8,24 @@ arrayfactory(prob::IntervalAmbiguitySets, T, sizes) = arrayfactory(prob.gap, T, sizes) arrayfactory(::MR, T, sizes) where {MR <: AbstractArray} = Array{T}(undef, sizes) +function valuetype(prob::AbstractIntervalMDPProblem) + spec_valuetype = valuetype(specification(prob)) + sys_valuetype = valuetype(system(prob)) + + if isnothing(spec_valuetype) + return sys_valuetype + end + + return promote_type(spec_valuetype, sys_valuetype) +end +valuetype(spec::Specification) = valuetype(system_property(spec)) + valuetype(mp::ProductProcess) = valuetype(markov_process(mp)) valuetype(mp::FactoredRMDP) = promote_type(valuetype.(marginals(mp))...) valuetype(marginal::Marginal) = valuetype(ambiguity_sets(marginal)) valuetype(::IntervalAmbiguitySets{R}) where {R} = R valuetype(::AbstractArray{R}) where {R} = R + +valuetype(::Property) = nothing valuetype(::FiniteTimeReward{R}) where {R} = R valuetype(::InfiniteTimeReward{R}) where {R} = R \ No newline at end of file From 889e65c13cf117a3c7ea4bb08d7c1cbc7908906c Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 15 Sep 2025 11:54:24 +0200 Subject: [PATCH 14/71] Switch to 1 warp per state (rather than per state/action pair) for CuDenseOMax kernel --- benchmark/imdp/execute.jl | 5 ++- benchmark/imdp/problem.jl | 4 +- ext/cuda/bellman/dense.jl | 95 ++++++++++++++------------------------- 3 files changed, 38 insertions(+), 66 deletions(-) diff --git a/benchmark/imdp/execute.jl b/benchmark/imdp/execute.jl index 3668af8a..2b5431c6 100644 --- a/benchmark/imdp/execute.jl +++ b/benchmark/imdp/execute.jl @@ -1,5 +1,6 @@ using Revise, BenchmarkTools using IntervalMDP, CUDA -V_conv, _, u = value_iteration(prob) -display(@benchmark value_iteration(prob)) +V_conv, _, u = solve(prob) + +display(@benchmark solve(prob)) diff --git a/benchmark/imdp/problem.jl b/benchmark/imdp/problem.jl index 60049dbc..d543b952 100644 --- a/benchmark/imdp/problem.jl +++ b/benchmark/imdp/problem.jl @@ -4,6 +4,6 @@ using IntervalMDP, IntervalMDP.Data, SparseArrays, CUDA, Adapt path = joinpath(@__DIR__, "multiObj_robotIMDP.txt") mdp, terminal_states = read_bmdp_tool_file(path) -prop = InfiniteTimeReachability(terminal_states, 1e-6) +prop = FiniteTimeReachability(terminal_states, 100) spec = Specification(prop, Pessimistic, Maximize) -prob = Problem(mdp, spec) +prob = VerificationProblem(mdp, spec) diff --git a/ext/cuda/bellman/dense.jl b/ext/cuda/bellman/dense.jl index 08eb643e..4f017626 100644 --- a/ext/cuda/bellman/dense.jl +++ b/ext/cuda/bellman/dense.jl @@ -32,12 +32,12 @@ function IntervalMDP._bellman_helper!( max_threads = prevwarp(device(), config.threads) # Execution plan: - # - value assignment: 1 warp per state/action pair - # - reduce over actions in the first warp for each state + # - value assignment: 1 warp per state + # - reduce over actions # - squeeze as many states as possible in a block # - use shared memory to store the values and permutation # - use bitonic sort to sort the values for all states in a block - threads_per_state = min(max_threads, 32 * n_actions) + threads_per_state = 32 states_per_block = min(n_states, div(max_threads, threads_per_state)) threads = threads_per_state * states_per_block blocks = min(2^16 - 1, cld(n_states, states_per_block)) @@ -69,14 +69,10 @@ function dense_bellman_kernel!( action_reduce, ) where {Tv} # Prepare action workspace shared memory - tps = threads_per_state(workspace, strategy_cache) - states_per_block = div(blockDim().x, tps) - sid = fld1(threadIdx().x, tps) - - action_workspace = initialize_action_workspace(workspace, strategy_cache, V, states_per_block, sid) + action_workspace = initialize_action_workspace(workspace, strategy_cache, V) # Prepare sorting shared memory - value, perm = initialize_value_and_perm(workspace, strategy_cache, V, marginal, states_per_block) + value, perm = initialize_value_and_perm(workspace, strategy_cache, V, marginal) # Perform sorting dense_initialize_sorting_shared_memory!(V, value, perm) @@ -92,8 +88,6 @@ function dense_bellman_kernel!( marginal, value, perm, - states_per_block, - sid, action_reduce, ) @@ -103,20 +97,19 @@ end @inline function initialize_action_workspace( workspace, ::OptimizingActiveCache, - marginal, - states_per_block, - sid, + marginal ) - action_workspace = CuDynamicSharedArray(IntervalMDP.valuetype(marginal), (workspace.num_actions, states_per_block)) - @inbounds return @view action_workspace[:, sid] + assume(warpsize() == 32) + nwarps = div(blockDim().x, warpsize()) + wid = fld1(threadIdx().x, warpsize()) + action_workspace = CuDynamicSharedArray(IntervalMDP.valuetype(marginal), (workspace.num_actions, nwarps)) + @inbounds return @view action_workspace[:, wid] end @inline function initialize_action_workspace( workspace, ::NonOptimizingActiveCache, - marginal, - states_per_block, - sid, + marginal ) return nothing end @@ -125,12 +118,13 @@ end workspace, ::OptimizingActiveCache, V::AbstractVector{Tv}, - marginal, - states_per_block, + marginal ) where {Tv} + assume(warpsize() == 32) + nwarps = div(blockDim().x, warpsize()) Tv2 = IntervalMDP.valuetype(marginal) - value = CuDynamicSharedArray(Tv, length(V), workspace.num_actions * states_per_block * sizeof(Tv2)) - perm = CuDynamicSharedArray(Int32, length(V), workspace.num_actions * states_per_block * sizeof(Tv2) + length(V) * sizeof(Tv)) + value = CuDynamicSharedArray(Tv, length(V), workspace.num_actions * nwarps * sizeof(Tv2)) + perm = CuDynamicSharedArray(Int32, length(V), workspace.num_actions * nwarps * sizeof(Tv2) + length(V) * sizeof(Tv)) return value, perm end @@ -138,30 +132,13 @@ end workspace, ::NonOptimizingActiveCache, V::AbstractVector{Tv}, - marginal, - states_per_block, + marginal ) where {Tv} value = CuDynamicSharedArray(Tv, length(V)) perm = CuDynamicSharedArray(Int32, length(V), length(V) * sizeof(Tv)) return value, perm end -@inline function threads_per_state( - workspace, - ::OptimizingActiveCache, -) - assume(warpsize() == 32) - return min(blockDim().x, warpsize() * workspace.num_actions) -end - -@inline function threads_per_state( - workspace, - ::NonOptimizingActiveCache, -) - assume(warpsize() == 32) - return warpsize() -end - @inline function dense_initialize_sorting_shared_memory!(V, value, perm) # Copy into shared memory i = threadIdx().x @@ -184,11 +161,12 @@ end marginal, value, perm, - states_per_block, - sid, action_reduce, ) - jₛ = sid + (blockIdx().x - one(Int32)) * states_per_block + assume(warpsize() == 32) + nwarps = div(blockDim().x, warpsize()) + wid = fld1(threadIdx().x, warpsize()) + jₛ = wid + (blockIdx().x - one(Int32)) * nwarps @inbounds while jₛ <= source_shape(marginal)[1] # Grid-stride loop state_dense_omaximization!( workspace, @@ -202,7 +180,7 @@ end jₛ, action_reduce, ) - jₛ += gridDim().x * states_per_block + jₛ += gridDim().x * nwarps end return nothing @@ -217,18 +195,14 @@ end marginal, value, perm, - jₛ, + jₛ::Int32, action_reduce, ) where {Tv} assume(warpsize() == 32) + lane = mod1(threadIdx().x, warpsize()) + nwarps = div(blockDim().x, warpsize()) - tps = threads_per_state(workspace, strategy_cache) - nwarps_per_state = div(tps, warpsize()) - - warp, lane = fldmod1(threadIdx().x, warpsize()) - state_warp = mod1(warp, nwarps_per_state) - - jₐ = state_warp + jₐ = one(Int32) @inbounds while jₐ <= action_shape(marginal)[1] ambiguity_set = marginal[(jₐ,), (jₛ,)] @@ -240,18 +214,16 @@ end end sync_warp() - jₐ += nwarps_per_state + jₐ += nwarps end # Find the best action - if state_warp == one(Int32) - v = extract_strategy_warp!(strategy_cache, action_workspace, Vres, jₛ, action_reduce, lane) + v = extract_strategy_warp!(strategy_cache, action_workspace, Vres, jₛ, action_reduce, lane) - if lane == one(Int32) - Vres[jₛ] = v - end + if lane == one(Int32) + Vres[jₛ] = v end - sync_threads() + sync_warp() end @inline function state_dense_omaximization!( @@ -263,7 +235,7 @@ end marginal, value, perm, - jₛ, + jₛ::Int32, action_reduce, ) where {Tv} lane = mod1(threadIdx().x, warpsize()) @@ -308,7 +280,6 @@ end used = CUDA.reduce_warp(+, used) used = shfl_sync(0xffffffff, used, one(Int32)) remaining = one(R) - used - sync_warp() # Add the gap multiplied by the value s = lane From daf718a825db91c38e6b8ce256e260c8425c1834 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 15 Sep 2025 13:24:07 +0200 Subject: [PATCH 15/71] Add pretty printing of problems --- benchmark/imdp/execute.jl | 9 +- benchmark/imdp/problem.jl | 8 + ext/cuda/workspace.jl | 4 +- src/algorithms.jl | 25 +++ src/models/DFA.jl | 13 ++ .../FactoredRobustMarkovDecisionProcess.jl | 73 +++------ src/models/ProductProcess.jl | 11 +- src/probabilities/IntervalAmbiguitySets.jl | 27 ++++ src/probabilities/Marginal.jl | 10 ++ src/problem.jl | 17 ++ src/specification.jl | 152 ++++++++++++++---- src/strategy.jl | 11 ++ src/workspace.jl | 8 +- test/base/bellman.jl | 4 +- test/cuda/dense/bellman.jl | 4 +- test/sparse/bellman.jl | 4 +- 16 files changed, 282 insertions(+), 98 deletions(-) diff --git a/benchmark/imdp/execute.jl b/benchmark/imdp/execute.jl index 2b5431c6..cb868ae1 100644 --- a/benchmark/imdp/execute.jl +++ b/benchmark/imdp/execute.jl @@ -1,6 +1,11 @@ using Revise, BenchmarkTools using IntervalMDP, CUDA -V_conv, _, u = solve(prob) +cu_prob = IntervalMDP.cu(prob) -display(@benchmark solve(prob)) +function test() + CUDA.@sync solve(cu_prob) +end + +test() # Warm-up +display(@benchmark test()) diff --git a/benchmark/imdp/problem.jl b/benchmark/imdp/problem.jl index d543b952..ddfb9fc1 100644 --- a/benchmark/imdp/problem.jl +++ b/benchmark/imdp/problem.jl @@ -4,6 +4,14 @@ using IntervalMDP, IntervalMDP.Data, SparseArrays, CUDA, Adapt path = joinpath(@__DIR__, "multiObj_robotIMDP.txt") mdp, terminal_states = read_bmdp_tool_file(path) + +marginal = marginals(mdp)[1] +amb = IntervalAmbiguitySets( + Array(ambiguity_sets(marginal).lower), + Array(ambiguity_sets(marginal).gap), +) + +mdp = IntervalMarkovDecisionProcess(amb, num_actions(mdp), initial_states(mdp)) prop = FiniteTimeReachability(terminal_states, 100) spec = Specification(prop, Pessimistic, Maximize) prob = VerificationProblem(mdp, spec) diff --git a/ext/cuda/workspace.jl b/ext/cuda/workspace.jl index 8afb61e0..baf3f830 100644 --- a/ext/cuda/workspace.jl +++ b/ext/cuda/workspace.jl @@ -9,7 +9,7 @@ end IntervalMDP.construct_workspace( prob::IntervalAmbiguitySets{R, MR}, - ::OMaximization; + ::OMaximization = default_bellman_algorithm(prob); num_actions = 1, kwargs... ) where {R, MR <: AbstractGPUMatrix{R}} = CuDenseOMaxWorkspace(num_actions) @@ -29,7 +29,7 @@ end IntervalMDP.construct_workspace( prob::IntervalAmbiguitySets{R, MR}, - ::OMaximization; + ::OMaximization = default_bellman_algorithm(prob); num_actions = 1, kwargs... ) where {R, MR <: AbstractCuSparseMatrix{R}} = CuSparseOMaxWorkspace(prob, num_actions) diff --git a/src/algorithms.jl b/src/algorithms.jl index 03ec0f0d..5834216f 100644 --- a/src/algorithms.jl +++ b/src/algorithms.jl @@ -10,6 +10,22 @@ default_bellman_algorithm(::FactoredRMDP, ::IsIMDP) = OMaximization() default_bellman_algorithm(::FactoredRMDP, ::IsFIMDP) = LPMcCormickRelaxation() default_bellman_algorithm(::IntervalAmbiguitySets) = OMaximization() +function showbellmanalg(io::IO, prefix, ::IsIMDP,::OMaximization) + println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:O-Maximization}") +end + +function showbellmanalg(io::IO, prefix, ::IsFIMDP,::OMaximization) + println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:Recursive O-Maximization}") +end + +function showbellmanalg(io::IO, prefix, ::IsFIMDP, ::LPMcCormickRelaxation) + println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:Binary tree LP McCormick Relaxation}") +end + +function showbellmanalg(io::IO, prefix, _, ::BellmanAlgorithm) + println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:None}") +end + abstract type ModelCheckingAlgorithm end ########################## @@ -42,3 +58,12 @@ default_algorithm(system::StochasticProcess) = RobustValueIteration(default_bell solve(problem::AbstractIntervalMDPProblem; kwargs...) = solve(problem, default_algorithm(problem); kwargs...) + + +function showmcalgorithm(io::IO, prefix, ::RobustValueIteration) + println(io, prefix,"├─", styled"Default model checking algorithm: {green:Robust Value Iteration}") +end + +function showmcalgorithm(io::IO, prefix, ::ModelCheckingAlgorithm) + println(io, prefix,"├─", styled"Default model checking algorithm: {green:None}") +end \ No newline at end of file diff --git a/src/models/DFA.jl b/src/models/DFA.jl index ba314b40..0d86a6b5 100644 --- a/src/models/DFA.jl +++ b/src/models/DFA.jl @@ -155,3 +155,16 @@ Base.getindex(dfa::DFA, q, w::String) = dfa[q, dfa.labelmap[w]] Base.iterate(dfa::DFA, state::Int32 = one(Int32)) = state > num_states(dfa) ? nothing : (state, state + one(Int32)) + +function showsystem(io::IO, first_prefix, prefix, dfa::DFA) + # TODO: Print diagram? + println(io, first_prefix, styled"{code:DFA}") + println(io, prefix, styled"├─ Number of states: {magenta:$(num_states(dfa))}") + println(io, prefix, styled"├─ Number of labels: {magenta:$(num_labels(dfa))}") + println(io, prefix, styled"├─ Initial state: {magenta:$(initial_state(dfa))}") + println( + io, + prefix, + styled"└─ Accepting states: {magenta:$(accepting_states(dfa))}", + ) +end \ No newline at end of file diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl index 2461fd24..d03ff3c4 100644 --- a/src/models/FactoredRobustMarkovDecisionProcess.jl +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -177,44 +177,35 @@ modeltype(::FactoredRMDP{N}, ::NTuple{N, AbstractIsPolytopic}) where {N} = IsFRM ### Pretty printing -function Base.show(io::IO, mime::MIME"text/plain", mdp::FactoredRMDP{N, M}) where {N, M} - println(io, styled"{code:FactoredRobustMarkovDecisionProcess}") - println(io, "├─ ", N, styled" state variables with cardinality: {magenta:$(state_variables(mdp))}") - println(io, "├─ ", M, styled" action variables with cardinality: {magenta:$(action_variables(mdp))}") +function Base.show(io::IO, mime::MIME"text/plain", mdp::FactoredRMDP) + showsystem(io, "", "", mdp) +end + +function showsystem(io::IO, first_prefix, prefix, mdp::FactoredRMDP{N, M}) where {N, M} + println(io, first_prefix, styled"{code:FactoredRobustMarkovDecisionProcess}") + println(io, prefix, "├─ ", N, styled" state variables with cardinality: {magenta:$(state_variables(mdp))}") + println(io, prefix, "├─ ", M, styled" action variables with cardinality: {magenta:$(action_variables(mdp))}") if initial_states(mdp) isa AllStates - println(io, "├─ ", styled"Initial states: {magenta:All states}") + println(io, prefix, "├─ ", styled"Initial states: {magenta:All states}") else - println(io, "├─ ", styled"Initial states: {magenta:$(initial_states(mdp))}") + println(io, prefix, "├─ ", styled"Initial states: {magenta:$(initial_states(mdp))}") end - println(io, "├─ ", styled"Transition marginals:") - prefix = "│ " + println(io, prefix, "├─ ", styled"Transition marginals:") + marginal_prefix = prefix * "│ " for (i, marginal) in enumerate(mdp.transition[1:end - 1]) - println(io, prefix, "├─ Marginal $i: ") - showmarginal(io, prefix * "│ ", marginal) + println(io, marginal_prefix, "├─ Marginal $i: ") + showmarginal(io, marginal_prefix * "│ ", marginal) end - println(io, prefix, "└─ Marginal $(length(mdp.transition)): ") - showmarginal(io, prefix * " ", mdp.transition[end]) + println(io, marginal_prefix, "└─ Marginal $(length(mdp.transition)): ") + showmarginal(io, marginal_prefix * " ", mdp.transition[end]) - showinferred(io, mdp) + showinferred(io, prefix, mdp) end -function showmarginal(io::IO, prefix, marginal::Marginal{<:IntervalAmbiguitySets{R, MR}}) where {R, MR <: AbstractMatrix} - println(io, prefix, styled"├─ Ambiguity set type: Interval (dense, {code:$MR})") - println(io, prefix, styled"└─ Conditional variables: {magenta:states = $(state_variables(marginal)), actions = $(action_variables(marginal))}") -end - -function showmarginal(io::IO, prefix, marginal::Marginal{<:IntervalAmbiguitySets{R, MR}}) where {R, MR <: AbstractSparseMatrix} - println(io, prefix, styled"├─ Ambiguity set type: Interval (sparse, {code:$MR})") - println(io, prefix, styled"├─ Conditional variables: {magenta:states = $(state_variables(marginal)), actions = $(action_variables(marginal))}") - num_transitions = nnz(ambiguity_sets(marginal).gap) - max_support = maximum(supportsize, ambiguity_sets(marginal)) - println(io, prefix, styled"└─ Transitions: {magenta: $num_transitions (max support: $max_support)}") -end - -function showinferred(io::IO, mdp::FactoredRMDP) - println(io, "└─", styled"{red:Inferred properties}") - prefix = " " +function showinferred(io::IO, prefix, mdp::FactoredRMDP) + println(io, prefix, "└─", styled"{red:Inferred properties}") + prefix = prefix * " " showmodeltype(io, prefix, mdp) println(io, prefix, "├─", styled"Number of states: {green:$(num_states(mdp))}") println(io, prefix, "├─", styled"Number of actions: {green:$(num_actions(mdp))}") @@ -244,28 +235,4 @@ end function showmodeltype(io::IO, prefix, ::IsRMDP) println(io, prefix, "├─", styled"Model type: {green:Robust MDP}") -end - -function showmcalgorithm(io::IO, prefix, ::RobustValueIteration) - println(io, prefix,"├─", styled"Default model checking algorithm: {green:Robust Value Iteration}") -end - -function showmcalgorithm(io::IO, prefix, _) - println(io, prefix,"├─", styled"Default model checking algorithm: {green:None}") -end - -function showbellmanalg(io::IO, prefix, ::IsIMDP,::OMaximization) - println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:O-Maximization}") -end - -function showbellmanalg(io::IO, prefix, ::IsFIMDP,::OMaximization) - println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:Recursive O-Maximization}") -end - -function showbellmanalg(io::IO, prefix, ::IsFIMDP, ::LPMcCormickRelaxation) - println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:Binary tree LP McCormick Relaxation}") -end - -function showbellmanalg(io::IO, prefix, _, _) - println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:None}") end \ No newline at end of file diff --git a/src/models/ProductProcess.jl b/src/models/ProductProcess.jl index 83602125..ef43ae2f 100644 --- a/src/models/ProductProcess.jl +++ b/src/models/ProductProcess.jl @@ -97,4 +97,13 @@ labelling_function(proc::ProductProcess) = proc.labelling_func state_variables(proc::ProductProcess) = (state_variables(markov_process(proc))..., num_states(automaton(proc))) source_shape(proc::ProductProcess) = (source_shape(markov_process(proc))..., num_states(automaton(proc))) action_variables(proc::ProductProcess) = action_variables(markov_process(proc)) -action_shape(proc::ProductProcess) = action_shape(markov_process(proc)) \ No newline at end of file +action_shape(proc::ProductProcess) = action_shape(markov_process(proc)) + +function showsystem(io::IO, prefix, mdp::ProductProcess{M, D, L}) where {M, D, L} + println(io, prefix, styled"{code:ProductProcess}") + println(io, prefix, "├─ Underlying process:") + showsystem(io, prefix * "├─ ", prefix * "│ ", markov_process(mdp)) + println(io, prefix, "├─ Automaton:") + showsystem(io, prefix * "│ ", automaton(mdp)) + println(io, prefix, styled"└─ Labelling type: {magenta:$(L)}") +end \ No newline at end of file diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index 19ec0709..c1f7784c 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -199,6 +199,33 @@ function Base.iterate(p::IntervalAmbiguitySets, state) end Base.length(p::IntervalAmbiguitySets) = num_sets(p) +function showambiguitysets(io::IO, prefix, ::IntervalAmbiguitySets{R, MR}) where {R, MR <: AbstractMatrix} + println(io, prefix, styled"└─ Ambiguity set type: Interval (dense, {code:$MR})") +end + +function showambiguitysets(io::IO, prefix, p::IntervalAmbiguitySets{R, MR}) where {R, MR <: AbstractSparseMatrix} + println(io, prefix, styled"├─ Ambiguity set type: Interval (sparse, {code:$MR})") + num_transitions = nnz(p.gap) + max_support = maximum(supportsize, p) + println(io, prefix, styled"└─ Transitions: {magenta: $num_transitions (max support: $max_support)}") +end + +function Base.show(io::IO, mime::MIME"text/plain", p::IntervalAmbiguitySets{R, MR}) where {R, MR <: AbstractMatrix} + println(io, styled"{code:IntervalAmbiguitySets}") + println(io, styled"├─ Storage type: {code:$MR}") + println(io, "├─ Number of target states: ", num_target(p)) + println(io, "└─ Number of ambiguity sets: ", num_sets(p)) +end + +function Base.show(io::IO, mime::MIME"text/plain", p::IntervalAmbiguitySets{R, MR}) where {R, MR <: AbstractSparseMatrix} + println(io, styled"{code:IntervalAmbiguitySets}") + println(io, styled"├─ Storage type: {code:$MR}") + println(io, "├─ Number of target states: ", num_target(p)) + println(io, "├─ Number of ambiguity sets: ", num_sets(p)) + println(io, "├─ Maximum support size: ", maximum(supportsize, p)) + println(io, "└─ Number of non-zeros: ", nnz(p.gap)) +end + struct IntervalAmbiguitySet{R, VR <: AbstractVector{R}} <: PolytopicAmbiguitySet lower::VR gap::VR diff --git a/src/probabilities/Marginal.jl b/src/probabilities/Marginal.jl index 7ff2e499..d759c641 100644 --- a/src/probabilities/Marginal.jl +++ b/src/probabilities/Marginal.jl @@ -88,4 +88,14 @@ function sub2ind(p::Marginal, action::NTuple{M, T}, source::NTuple{N, T}) where j = p.linear_index[action..., source...] return T(j) +end + +function showmarginal(io::IO, prefix, marginal::Marginal) + println(io, prefix, styled"├─ Conditional variables: {magenta:states = $(state_variables(marginal)), actions = $(action_variables(marginal))}") + showambiguitysets(io, prefix, ambiguity_sets(marginal)) +end + +function Base.show(io::IO, mime::MIME"text/plain", marginal::Marginal) + println(io, styled"{code:Marginal}") + showmarginal(io, "", marginal) end \ No newline at end of file diff --git a/src/problem.jl b/src/problem.jl index e3e98714..e4743006 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -60,6 +60,17 @@ Return the strategy of a problem, if provided. """ strategy(prob::VerificationProblem) = prob.strategy +function Base.show(io::IO, mime::MIME"text/plain", prob::VerificationProblem) + println(io, styled"{code:VerificationProblem}") + showsystem(io, "├─ ", "│ ", system(prob)) + showspecification(io, "├─ ", "│ ", specification(prob)) + if !(prob.strategy isa NoStrategy) + showstrategy(io, "└─ ", " ", strategy(prob)) + else + println(io, "└─ ", styled"No strategy provided (selecting optimal actions at every step)") + end +end + # Solution struct VerificationSolution{R, MR <: AbstractArray{R}, D} value_function::MR @@ -176,3 +187,9 @@ num_iterations(s::ControlSynthesisSolution) = s.num_iterations Base.iterate(s::ControlSynthesisSolution, args...) = iterate((s.strategy, s.value_function, s.num_iterations, s.residual), args...) + +function Base.show(io::IO, mime::MIME"text/plain", prob::ControlSynthesisProblem) + println(io, styled"{code:ControlSynthesisProblem}") + showsystem(io, "├─ ", "│ ", system(prob)) + showspecification(io, "└─ ", " ", specification(prob)) +end \ No newline at end of file diff --git a/src/specification.jl b/src/specification.jl index 893f1943..e0e68dfe 100644 --- a/src/specification.jl +++ b/src/specification.jl @@ -15,6 +15,8 @@ function checkmodelpropertycompatibility(prop, system) ) end +Base.show(io::IO, mime::MIME"text/plain", prop::Property) = showproperty(io, "", "", spec) + """ BasicProperty @@ -174,6 +176,12 @@ property. """ reach(prop::FiniteTimeDFAReachability) = prop.terminal_states +function showproperty(io::IO, first_prefix, prefix, spec::FiniteTimeDFAReachability) + println(io, first_prefix, styled"{code:FiniteTimeDFAReachability}") + println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(spec))}") + println(io, prefix, styled"└─ Reach states: {magenta:$(reach(spec))}") +end + """ InfiniteTimeDFAReachability{R <: Real, VT <: Vector{<:Int32}} @@ -231,6 +239,12 @@ property. """ reach(prop::InfiniteTimeDFAReachability) = prop.terminal_states +function showproperty(io::IO, first_prefix, prefix, spec::InfiniteTimeDFAReachability) + println(io, first_prefix, styled"{code:InfiniteTimeDFAReachability}") + println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(spec))}") + println(io, prefix, styled"└─ Reach states: {magenta:$(reach(spec))}") +end + ## Reachability """ @@ -311,6 +325,12 @@ terminal states differ. """ reach(prop::FiniteTimeReachability) = prop.terminal_states +function showproperty(io::IO, first_prefix, prefix, spec::FiniteTimeReachability) + println(io, first_prefix, styled"{code:FiniteTimeReachability}") + println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(spec))}") + println(io, prefix, styled"└─ Reach states: {magenta:$(reach(spec))}") +end + """ InfiniteTimeReachability{R <: Real, VT <: Vector{<:CartesianIndex}} @@ -369,6 +389,12 @@ terminal states differ. """ reach(prop::InfiniteTimeReachability) = prop.terminal_states +function showproperty(io::IO, first_prefix, prefix, spec::InfiniteTimeReachability) + println(io, first_prefix, styled"{code:InfiniteTimeReachability}") + println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(spec))}") + println(io, prefix, styled"└─ Reach states: {magenta:$(reach(spec))}") +end + """ ExactTimeReachability{VT <: Vector{<:CartesianIndex}, T <: Integer} @@ -434,6 +460,12 @@ terminal states differ. """ reach(prop::ExactTimeReachability) = prop.terminal_states +function showproperty(io::IO, first_prefix, prefix, spec::ExactTimeReachability) + println(io, first_prefix, styled"{code:ExactTimeReachability}") + println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(spec))}") + println(io, prefix, styled"└─ Reach states: {magenta:$(reach(spec))}") +end + ## Reach-avoid """ @@ -448,6 +480,38 @@ function step_postprocess_value_function!(value_function, prop::AbstractReachAvo @inbounds value_function.current[avoid(prop)] .= 0.0 end +function checkstatebounds(states, system::IntervalMarkovProcess) + pns = state_variables(system) + for j in states + j = Tuple(j) + + if length(j) != length(pns) + throw(StateDimensionMismatch(j, length(pns))) + end + + if any(j .< 1) || any(j .> pns) + throw(InvalidStateError(j, pns)) + end + end +end + +checkstatebounds(states, system::ProductProcess) = + checkstatebounds(states, automaton(system)) + +function checkstatebounds(states, system::DeterministicAutomaton) + for state in states + if state < 1 || state > num_states(system) + throw(InvalidStateError(state, num_states(system))) + end + end +end + +function checkdisjoint(reach, avoid) + if !isdisjoint(reach, avoid) + throw(DomainError((reach, avoid), "reach and avoid sets are not disjoint")) + end +end + """ FiniteTimeReachAvoid{VT <: AbstractVector{<:CartesianIndex}}, T <: Integer} @@ -521,6 +585,13 @@ Return the set of states to avoid. """ avoid(prop::FiniteTimeReachAvoid) = prop.avoid +function showproperty(io::IO, first_prefix, prefix, spec::FiniteTimeReachAvoid) + println(io, first_prefix, styled"{code:FiniteTimeReachAvoid}") + println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(spec))}") + println(io, prefix, styled"├─ Reach states: {magenta:$(reach(spec))}") + println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(spec))}") +end + """ InfiniteTimeReachAvoid{R <: Real, VT <: AbstractVector{<:CartesianIndex}} @@ -589,6 +660,13 @@ Return the set of states to avoid. """ avoid(prop::InfiniteTimeReachAvoid) = prop.avoid +function showproperty(io::IO, first_prefix, prefix, spec::InfiniteTimeReachAvoid) + println(io, first_prefix, styled"{code:InfiniteTimeReachAvoid}") + println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(spec))}") + println(io, prefix, styled"├─ Reach states: {magenta:$(reach(spec))}") + println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(spec))}") +end + """ ExactTimeReachAvoid{VT <: AbstractVector{<:CartesianIndex}}, T <: Integer} @@ -666,36 +744,11 @@ Return the set of states to avoid. """ avoid(prop::ExactTimeReachAvoid) = prop.avoid -function checkstatebounds(states, system::IntervalMarkovProcess) - pns = state_variables(system) - for j in states - j = Tuple(j) - - if length(j) != length(pns) - throw(StateDimensionMismatch(j, length(pns))) - end - - if any(j .< 1) || any(j .> pns) - throw(InvalidStateError(j, pns)) - end - end -end - -checkstatebounds(states, system::ProductProcess) = - checkstatebounds(states, automaton(system)) - -function checkstatebounds(states, system::DeterministicAutomaton) - for state in states - if state < 1 || state > num_states(system) - throw(InvalidStateError(state, num_states(system))) - end - end -end - -function checkdisjoint(reach, avoid) - if !isdisjoint(reach, avoid) - throw(DomainError((reach, avoid), "reach and avoid sets are not disjoint")) - end +function showproperty(io::IO, first_prefix, prefix, spec::ExactTimeReachAvoid) + println(io, first_prefix, styled"{code:ExactTimeReachAvoid}") + println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(spec))}") + println(io, prefix, styled"├─ Reach states: {magenta:$(reach(spec))}") + println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(spec))}") end ## Safety @@ -777,6 +830,12 @@ This is equivalent for [`terminal_states(prop::FiniteTimeSafety)`](@ref). """ avoid(prop::FiniteTimeSafety) = prop.avoid_states +function showproperty(io::IO, first_prefix, prefix, spec::FiniteTimeSafety) + println(io, first_prefix, styled"{code:FiniteTimeSafety}") + println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(spec))}") + println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(spec))}") +end + """ InfiniteTimeSafety{R <: Real, VT <: Vector{<:CartesianIndex}} @@ -832,6 +891,12 @@ This is equivalent for [`terminal_states(prop::InfiniteTimeSafety)`](@ref). """ avoid(prop::InfiniteTimeSafety) = prop.avoid_states +function showproperty(io::IO, first_prefix, prefix, spec::InfiniteTimeSafety) + println(io, first_prefix, styled"{code:InfiniteTimeSafety}") + println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(spec))}") + println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(spec))}") +end + ## Reward """ @@ -922,6 +987,13 @@ Return the time horizon of a finite time reward optimization. """ time_horizon(prop::FiniteTimeReward) = prop.time_horizon +function showproperty(io::IO, first_prefix, prefix, spec::FiniteTimeReward) + println(io, first_prefix, styled"{code:FiniteTimeReward}") + println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(spec))}") + println(io, prefix, styled"├─ Discount factor: {magenta:$(discount(spec))}") + println(io, prefix, styled"└─ Reward storage: {magenta:$(eltype(reward(spec))), $(size(reward(spec)))}") +end + """ InfiniteTimeReward{R <: Real, AR <: AbstractArray{R}} @@ -984,6 +1056,13 @@ Return the convergence threshold of an infinite time reward optimization. """ convergence_eps(prop::InfiniteTimeReward) = prop.convergence_eps +function showproperty(io::IO, first_prefix, prefix, spec::InfiniteTimeReward) + println(io, first_prefix, styled"{code:InfiniteTimeReward}") + println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(spec))}") + println(io, prefix, styled"├─ Discount factor: {magenta:$(discount(spec))}") + println(io, prefix, styled"└─ Reward storage: {magenta:$(eltype(reward(spec))), $(size(reward(spec)))}") +end + ## Hitting time """ AbstractHittingTime @@ -1067,6 +1146,12 @@ Return the convergence threshold of an expected exit time. """ convergence_eps(prop::ExpectedExitTime) = prop.convergence_eps +function showproperty(io::IO, first_prefix, prefix, spec::ExpectedExitTime) + println(io, first_prefix, styled"{code:ExpectedExitTime}") + println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(spec))}") + println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(spec))}") +end + ## Problem """ @@ -1156,3 +1241,10 @@ Return the strategy mode of a specification. strategy_mode(spec::Specification) = spec.strategy ismaximize(spec::Specification) = ismaximize(strategy_mode(spec)) isminimize(spec::Specification) = isminimize(strategy_mode(spec)) + +function showspecification(io::IO, first_prefix, prefix, spec::Specification) + println(io, first_prefix, styled"{code:Specification}") + println(io, prefix, styled"├─ Satisfaction mode: {magenta:$(satisfaction_mode(spec))}") + println(io, prefix, styled"├─ Strategy mode: {magenta:$(strategy_mode(spec))}") + showproperty(io, prefix * "└─ Property: ", prefix * " ", system_property(spec)) +end \ No newline at end of file diff --git a/src/strategy.jl b/src/strategy.jl index 4880a193..d3042de1 100644 --- a/src/strategy.jl +++ b/src/strategy.jl @@ -47,6 +47,11 @@ function checkstrategy(strategy::AbstractArray, system::FactoredRMDP) end end +function showstrategy(io::IO, first_prefix, prefix, strategy::StationaryStrategy) + println(io, first_prefix, styled"{code:StationaryStrategy}") + println(io, prefix, styled"└─ Strategy shape: {magenta:$(size(strategy.strategy))}") +end + """ TimeVaryingStrategy @@ -64,3 +69,9 @@ function checkstrategy(strategy::TimeVaryingStrategy, system) checkstrategy(strategy_step, system) end end + +function showstrategy(io::IO, first_prefix, prefix, strategy::TimeVaryingStrategy) + println(io, first_prefix, styled"{code:TimeVaryingStrategy}") + println(io, prefix, styled"├─ Time length: {magenta:$(length(strategy.strategy))}") + println(io, prefix, styled"└─ Strategy shape: {magenta:$(size(strategy.strategy[1]))}") +end \ No newline at end of file diff --git a/src/workspace.jl b/src/workspace.jl index c83f94d0..9b081e3b 100644 --- a/src/workspace.jl +++ b/src/workspace.jl @@ -16,7 +16,7 @@ struct ProductWorkspace{W, MT <: AbstractArray} intermediate_values::MT end -function construct_workspace(proc::ProductProcess, alg; kwargs...) +function construct_workspace(proc::ProductProcess, alg=default_bellman_algorithm(proc); kwargs...) mp = markov_process(proc) underlying_workspace = construct_workspace(mp, alg; kwargs...) intermediate_values = arrayfactory(mp, valuetype(mp), state_variables(mp)) @@ -24,7 +24,7 @@ function construct_workspace(proc::ProductProcess, alg; kwargs...) return ProductWorkspace(underlying_workspace, intermediate_values) end -construct_workspace(mdp::FactoredRMDP, bellman_alg; kwargs...) = construct_workspace(mdp, modeltype(mdp), bellman_alg; kwargs...) +construct_workspace(mdp::FactoredRMDP, alg=default_bellman_algorithm(mdp); kwargs...) = construct_workspace(mdp, modeltype(mdp), alg; kwargs...) function construct_workspace( sys::FactoredRMDP, @@ -80,7 +80,7 @@ scratch(ws::ThreadedDenseIntervalOMaxWorkspace) = scratch(first(ws.thread_worksp function construct_workspace( prob::IntervalAmbiguitySets{R, MR}, - ::OMaximization; + ::OMaximization = default_bellman_algorithm(prob); threshold = 10, num_actions = 1, kwargs... ) where {R, MR <: AbstractMatrix{R}} if Threads.nthreads() == 1 || num_sets(prob) <= threshold @@ -124,7 +124,7 @@ Base.getindex(ws::ThreadedSparseIntervalOMaxWorkspace, i) = ws.thread_workspaces function construct_workspace( prob::IntervalAmbiguitySets{R, MR}, - ::OMaximization; + ::OMaximization = default_bellman_algorithm(prob); threshold = 10, num_actions = 1, kwargs... diff --git a/test/base/bellman.jl b/test/base/bellman.jl index a5f85a90..d7a3b668 100644 --- a/test/base/bellman.jl +++ b/test/base/bellman.jl @@ -11,7 +11,7 @@ using IntervalMDP #### Maximization @testset "maximization" begin - ws = IntervalMDP.construct_workspace(prob, OMaximization()) + ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = zeros(N, 2) IntervalMDP._bellman_helper!( @@ -53,7 +53,7 @@ using IntervalMDP #### Minimization @testset "minimization" begin - ws = IntervalMDP.construct_workspace(prob, OMaximization()) + ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = zeros(N, 2) IntervalMDP._bellman_helper!( diff --git a/test/cuda/dense/bellman.jl b/test/cuda/dense/bellman.jl index 071de15e..a57d0f06 100644 --- a/test/cuda/dense/bellman.jl +++ b/test/cuda/dense/bellman.jl @@ -12,7 +12,7 @@ using IntervalMDP, CUDA #### Maximization @testset "maximization" begin - ws = IntervalMDP.construct_workspace(prob, OMaximization()) + ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = CUDA.zeros(N, 2) IntervalMDP._bellman_helper!( @@ -29,7 +29,7 @@ using IntervalMDP, CUDA #### Minimization @testset "minimization" begin - ws = IntervalMDP.construct_workspace(prob, OMaximization()) + ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = CUDA.zeros(N, 2) IntervalMDP._bellman_helper!( diff --git a/test/sparse/bellman.jl b/test/sparse/bellman.jl index eaf23363..ef93e885 100644 --- a/test/sparse/bellman.jl +++ b/test/sparse/bellman.jl @@ -18,7 +18,7 @@ using IntervalMDP, SparseArrays #### Maximization @testset "maximization" begin - ws = IntervalMDP.construct_workspace(prob, OMaximization()) + ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = zeros(N, 2) IntervalMDP._bellman_helper!( @@ -60,7 +60,7 @@ using IntervalMDP, SparseArrays #### Minimization @testset "minimization" begin - ws = IntervalMDP.construct_workspace(prob, OMaximization()) + ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = zeros(N, 2) IntervalMDP._bellman_helper!( From 8668f0fffa648966a2957fc17dd193dbbdcc51a1 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 15 Sep 2025 13:58:44 +0200 Subject: [PATCH 16/71] Fix regression --- ext/cuda/bellman/dense.jl | 2 +- ext/cuda/workspace.jl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ext/cuda/bellman/dense.jl b/ext/cuda/bellman/dense.jl index 4f017626..b48af781 100644 --- a/ext/cuda/bellman/dense.jl +++ b/ext/cuda/bellman/dense.jl @@ -214,7 +214,7 @@ end end sync_warp() - jₐ += nwarps + jₐ += one(Int32) end # Find the best action diff --git a/ext/cuda/workspace.jl b/ext/cuda/workspace.jl index baf3f830..7c3bbf09 100644 --- a/ext/cuda/workspace.jl +++ b/ext/cuda/workspace.jl @@ -9,7 +9,7 @@ end IntervalMDP.construct_workspace( prob::IntervalAmbiguitySets{R, MR}, - ::OMaximization = default_bellman_algorithm(prob); + ::OMaximization = IntervalMDP.default_bellman_algorithm(prob); num_actions = 1, kwargs... ) where {R, MR <: AbstractGPUMatrix{R}} = CuDenseOMaxWorkspace(num_actions) @@ -29,7 +29,7 @@ end IntervalMDP.construct_workspace( prob::IntervalAmbiguitySets{R, MR}, - ::OMaximization = default_bellman_algorithm(prob); + ::OMaximization = IntervalMDP.default_bellman_algorithm(prob); num_actions = 1, kwargs... ) where {R, MR <: AbstractCuSparseMatrix{R}} = CuSparseOMaxWorkspace(prob, num_actions) From 932f7b557175cef97e69871bf7e55163c794d3a8 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 15 Sep 2025 21:12:29 +0200 Subject: [PATCH 17/71] Fix policy device conversion --- ext/IntervalMDPCudaExt.jl | 10 +++++----- ext/cuda/array.jl | 21 ++++++++++++--------- src/utils.jl | 4 ++++ test/cuda/dense/synthesis.jl | 6 +++--- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/ext/IntervalMDPCudaExt.jl b/ext/IntervalMDPCudaExt.jl index 328f01aa..02e25fc2 100644 --- a/ext/IntervalMDPCudaExt.jl +++ b/ext/IntervalMDPCudaExt.jl @@ -8,13 +8,13 @@ using GPUArrays: AbstractGPUArray, AbstractGPUVector, AbstractGPUMatrix using IntervalMDP, LinearAlgebra +# Opinionated conversion to GPU with preserved value types and Int32 indices +IntervalMDP.cu(obj) = adapt(IntervalMDP.CuModelAdaptor{IntervalMDP.valuetype(obj)}, obj) +IntervalMDP.cpu(obj) = adapt(IntervalMDP.CpuModelAdaptor{IntervalMDP.valuetype(obj)}, obj) + Adapt.@adapt_structure Marginal Adapt.@adapt_structure StationaryStrategy -Adapt.@adapt_structure TimeVaryingStrategy - -# Opinionated conversion to GPU with preserved value types and Int32 indices -IntervalMDP.cu(model) = adapt(IntervalMDP.CuModelAdaptor{IntervalMDP.valuetype(model)}, model) -IntervalMDP.cpu(model) = adapt(IntervalMDP.CpuModelAdaptor{IntervalMDP.valuetype(model)}, model) +Adapt.adapt_structure(to, strategy::TimeVaryingStrategy) = TimeVaryingStrategy([adapt(to, s) for s in strategy.strategy]) function Adapt.adapt_structure( T::Type{<:IntervalMDP.CuModelAdaptor}, diff --git a/ext/cuda/array.jl b/ext/cuda/array.jl index 5d5ebd44..0aa6816a 100644 --- a/ext/cuda/array.jl +++ b/ext/cuda/array.jl @@ -19,23 +19,26 @@ SparseArrays.nzrange(S::CuSparseMatrixCSC, col::Integer) = CUDA.@allowscalar(S.colPtr[col]):(CUDA.@allowscalar(S.colPtr[col + 1]) - 1) Adapt.adapt_storage( - ::Type{IntervalMDP.CuModelAdaptor{Tv}}, + ::Type{<:IntervalMDP.CuModelAdaptor{Tv}}, M::SparseArrays.FixedSparseCSC, ) where {Tv} = CuSparseMatrixCSC{Tv, Int32}(M) -Adapt.adapt_storage(::Type{IntervalMDP.CuModelAdaptor{Tv}}, M::SparseMatrixCSC) where {Tv} = - CuSparseMatrixCSC{Tv, Int32}(M) +Adapt.adapt_storage(::Type{<:IntervalMDP.CuModelAdaptor{Tv1}}, M::SparseMatrixCSC{Tv2}) where {Tv1, Tv2} = + CuSparseMatrixCSC{Tv1, Int32}(M) -Adapt.adapt_storage(::Type{IntervalMDP.CuModelAdaptor{Tv}}, x::AbstractArray) where {Tv} = - adapt(CuArray{Tv}, x) +Adapt.adapt_storage(::Type{<:IntervalMDP.CuModelAdaptor{Tv1}}, x::AbstractArray{Tv2}) where {Tv1, Tv2} = + adapt(CuArray{Tv1}, x) + +Adapt.adapt_storage(::Type{<:IntervalMDP.CuModelAdaptor{Tv1}}, x::AbstractArray{NTuple{N, T}}) where {Tv1, N, T <: Integer} = + adapt(CuArray{NTuple{N, T}}, x) Adapt.adapt_storage( ::Type{IntervalMDP.CpuModelAdaptor{Tv}}, M::CuSparseMatrixCSC, ) where {Tv} = SparseMatrixCSC{Tv, Int32}(M) -Adapt.adapt_storage(::Type{IntervalMDP.CpuModelAdaptor{Tv}}, x::CuArray{Tv}) where {Tv} = - adapt(Array{Tv}, x) +Adapt.adapt_storage(::Type{<:IntervalMDP.CpuModelAdaptor{Tv1}}, x::CuArray{Tv2}) where {Tv1, Tv2} = + adapt(Array{Tv1}, x) -Adapt.adapt_storage(::Type{IntervalMDP.CpuModelAdaptor{Tv}}, x::CuArray{<:Integer}) where {Tv} = - adapt(Array{Int32}, x) +Adapt.adapt_storage(::Type{<:IntervalMDP.CpuModelAdaptor{Tv1}}, x::CuArray{NTuple{N, T}}) where {Tv1, N, T <: Integer} = + adapt(Array{NTuple{N, T}}, x) diff --git a/src/utils.jl b/src/utils.jl index 76a9f160..ef047906 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -26,6 +26,10 @@ valuetype(marginal::Marginal) = valuetype(ambiguity_sets(marginal)) valuetype(::IntervalAmbiguitySets{R}) where {R} = R valuetype(::AbstractArray{R}) where {R} = R +valuetype(::TimeVaryingStrategy{N, <:AbstractArray{NTuple{N, T}}}) where {N, T} = T +valuetype(::StationaryStrategy{N, <:AbstractArray{NTuple{N, T}}}) where {N, T} = T +valuetype(::NoStrategy) = nothing + valuetype(::Property) = nothing valuetype(::FiniteTimeReward{R}) where {R} = R valuetype(::InfiniteTimeReward{R}) where {R} = R \ No newline at end of file diff --git a/test/cuda/dense/synthesis.jl b/test/cuda/dense/synthesis.jl index b62b666a..23dee47e 100644 --- a/test/cuda/dense/synthesis.jl +++ b/test/cuda/dense/synthesis.jl @@ -67,7 +67,7 @@ for k in 1:time_length(policy) end # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP -problem = VerificationProblem(mdp, spec, policy) +problem = VerificationProblem(mdp, spec, IntervalMDP.cu(policy)) V_mc, k, res = solve(problem) @test V ≈ V_mc @@ -86,7 +86,7 @@ for k in 1:time_length(policy) end # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP -problem = VerificationProblem(mdp, spec, policy) +problem = VerificationProblem(mdp, spec, IntervalMDP.cu(policy)) V_mc, k, res = solve(problem) @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_mc) atol=1e-5 @@ -102,7 +102,7 @@ policy = IntervalMDP.cpu(policy) # Convert to CPU for testing @test policy[1] == [(1,), (2,), (1,)] # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP -problem = VerificationProblem(mdp, spec, policy) +problem = VerificationProblem(mdp, spec, IntervalMDP.cu(policy)) V_mc, k, res = solve(problem) @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_mc) atol=1e-5 From 91d1e6bcdd0e8346d746e62a6d0b73064013d9e7 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 15 Sep 2025 21:34:31 +0200 Subject: [PATCH 18/71] Fix strategy synthesis for CUDA --- ext/cuda/strategy.jl | 26 +++++++++++++------------- test/cuda/dense/synthesis.jl | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/ext/cuda/strategy.jl b/ext/cuda/strategy.jl index 2e17ac0d..d5bac99d 100644 --- a/ext/cuda/strategy.jl +++ b/ext/cuda/strategy.jl @@ -47,7 +47,7 @@ Base.@propagate_inbounds Base.getindex(cache::GivenStrategyActiveCache, j) = action_min, action_neutral = action_reduce[1], action_reduce[3] warp_aligned_length = kernel_nextwarp(length(values)) - @inbounds opt_val = action_neutral + opt_val = action_neutral s = lane @inbounds while s <= warp_aligned_length @@ -66,10 +66,10 @@ Base.@propagate_inbounds Base.getindex(cache::GivenStrategyActiveCache, j) = end @inline function extract_strategy_warp!( - cache::TimeVaryingStrategyActiveCache, + cache::TimeVaryingStrategyActiveCache{1, <:AbstractVector{Tuple{Int32}}}, values::AbstractVector{Tv}, V, - j, + jₛ, action_reduce, lane, ) where {Tv} @@ -77,14 +77,14 @@ end action_lt, action_neutral = action_reduce[2], action_reduce[3] warp_aligned_length = kernel_nextwarp(length(values)) - opt_val, opt_idx = action_neutral, 1 + opt_val, opt_idx = action_neutral, one(Int32) s = lane @inbounds while s <= warp_aligned_length new_val, new_idx = if s <= length(values) values[s], s else - action_neutral, 1 + action_neutral, one(Int32) end opt_val, opt_idx = argop(action_lt, opt_val, opt_idx, new_val, new_idx) @@ -94,17 +94,17 @@ end opt_val, opt_idx = argmin_warp(action_lt, opt_val, opt_idx) if lane == 1 - @inbounds cache.cur_strategy[j] = opt_idx + @inbounds cache.cur_strategy[jₛ] = (opt_idx,) end return opt_val end @inline function extract_strategy_warp!( - cache::StationaryStrategyActiveCache, + cache::StationaryStrategyActiveCache{1, <:AbstractVector{Tuple{Int32}}}, values::AbstractVector{Tv}, V, - j, + jₛ, action_reduce, lane, ) where {Tv} @@ -112,10 +112,10 @@ end action_lt, action_neutral = action_reduce[2], action_reduce[3] warp_aligned_length = kernel_nextwarp(length(values)) - opt_val, opt_idx = if iszero(cache.strategy[j]) - action_neutral, 1 + opt_val, opt_idx = if iszero(cache.strategy[jₛ][1]) + action_neutral, one(Int32) else - V[j], cache.strategy[j] + V[jₛ], Int32(cache.strategy[jₛ][1]) end s = lane @@ -123,7 +123,7 @@ end new_val, new_idx = if s <= length(values) values[s], s else - action_neutral, 1 + action_neutral, one(Int32) end opt_val, opt_idx = argop(action_lt, opt_val, opt_idx, new_val, new_idx) @@ -133,7 +133,7 @@ end opt_val, opt_idx = argmin_warp(action_lt, opt_val, opt_idx) if lane == 1 - @inbounds cache.strategy[j] = opt_idx + @inbounds cache.strategy[jₛ] = (opt_idx,) end return opt_val diff --git a/test/cuda/dense/synthesis.jl b/test/cuda/dense/synthesis.jl index 23dee47e..7f2d79c2 100644 --- a/test/cuda/dense/synthesis.jl +++ b/test/cuda/dense/synthesis.jl @@ -72,7 +72,7 @@ V_mc, k, res = solve(problem) @test V ≈ V_mc # Finite time reward -prop = FiniteTimeReward([2.0, 1.0, 0.0], 0.9, 10) +prop = IntervalMDP.cu(FiniteTimeReward([2.0, 1.0, 0.0], 0.9, 10)) spec = Specification(prop, Pessimistic, Maximize) problem = ControlSynthesisProblem(mdp, spec) policy, V, k, res = solve(problem) From 430d2163612b23497447f31e34656abe6c2d60df Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 15 Sep 2025 21:34:42 +0200 Subject: [PATCH 19/71] Fix strategy validity check for CUDA --- src/strategy.jl | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/strategy.jl b/src/strategy.jl index d3042de1..0df6e091 100644 --- a/src/strategy.jl +++ b/src/strategy.jl @@ -36,14 +36,27 @@ function checkstrategy(strategy::AbstractArray, system::FactoredRMDP) ) end - for jₛ in CartesianIndices(source_shape(system)) - if !all(1 .<= strategy[jₛ] .<= action_shape(system)) - throw( - DomainError( - "The strategy includes at least one invalid action (less than 1 or greater than num_actions for the state).", - ), - ) + as = action_shape(system) + invalid = any(strategy) do s + for i in eachindex(s) + if s[i] < 1 + return true + end + + if s[i] > as[i] + return true + end end + + return false + end + + if invalid + throw( + DomainError( + "The strategy includes at least one invalid action (less than 1 or greater than num_actions for some action variable).", + ), + ) end end From b1c1c6e1c4ef3e4d47dbb5c501030cbf9eda5913 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 15 Sep 2025 21:34:54 +0200 Subject: [PATCH 20/71] Fix Base.show for Property --- src/specification.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/specification.jl b/src/specification.jl index e0e68dfe..ab12ac16 100644 --- a/src/specification.jl +++ b/src/specification.jl @@ -15,7 +15,7 @@ function checkmodelpropertycompatibility(prop, system) ) end -Base.show(io::IO, mime::MIME"text/plain", prop::Property) = showproperty(io, "", "", spec) +Base.show(io::IO, mime::MIME"text/plain", prop::Property) = showproperty(io, "", "", prop) """ BasicProperty From d5cadd8a0dd91f37bc45345789d54a363126a205 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 17 Sep 2025 21:03:06 +0200 Subject: [PATCH 21/71] Add vertex enumeration bellman update algorithm --- src/IntervalMDP.jl | 2 +- src/algorithms.jl | 5 + src/bellman.jl | 132 ++++++++- src/probabilities/IntervalAmbiguitySets.jl | 33 ++- src/probabilities/probabilities.jl | 2 + src/utils.jl | 1 + src/workspace.jl | 42 +++ test/base/factored.jl | 303 +++++++++++++++------ 8 files changed, 415 insertions(+), 105 deletions(-) diff --git a/src/IntervalMDP.jl b/src/IntervalMDP.jl index 9cf04a64..1b56343c 100644 --- a/src/IntervalMDP.jl +++ b/src/IntervalMDP.jl @@ -52,7 +52,7 @@ public cu, cpu ### Solving include("algorithms.jl") -export OMaximization, LPMcCormickRelaxation +export OMaximization, LPMcCormickRelaxation, VertexEnumeration export RobustValueIteration include("utils.jl") diff --git a/src/algorithms.jl b/src/algorithms.jl index 5834216f..aacaa922 100644 --- a/src/algorithms.jl +++ b/src/algorithms.jl @@ -3,6 +3,7 @@ struct OMaximization <: BellmanAlgorithm end Base.@kwdef struct LPMcCormickRelaxation{O} <: BellmanAlgorithm lp_optimizer::O = HiGHS.Optimizer end +struct VertexEnumeration <: BellmanAlgorithm end default_bellman_algorithm(pp::ProductProcess) = default_bellman_algorithm(markov_process(pp)) default_bellman_algorithm(mdp::FactoredRMDP) = default_bellman_algorithm(mdp, modeltype(mdp)) @@ -22,6 +23,10 @@ function showbellmanalg(io::IO, prefix, ::IsFIMDP, ::LPMcCormickRelaxation) println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:Binary tree LP McCormick Relaxation}") end +function showbellmanalg(io::IO, prefix, ::IsFIMDP, ::VertexEnumeration) + println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:Vertex Enumeration}") +end + function showbellmanalg(io::IO, prefix, _, ::BellmanAlgorithm) println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:None}") end diff --git a/src/bellman.jl b/src/bellman.jl index 5a4d7ef8..01037f05 100644 --- a/src/bellman.jl +++ b/src/bellman.jl @@ -238,9 +238,9 @@ function localize_strategy_cache(strategy_cache::ActiveGivenStrategyCache, dfa_s ) end -###################################################### -# Bellman operator for IntervalMarkovDecisionProcess # -###################################################### +########################################################################### +# O-Maximization-based Bellman operator for IntervalMarkovDecisionProcess # +########################################################################### # Non-threaded function _bellman_helper!( @@ -426,9 +426,9 @@ Base.@propagate_inbounds function gap_value( return res end -################################################## -# McCormick relaxation-based Bellman over fRMDPs # -################################################## +########################################################## +# McCormick relaxation-based Bellman operator for fIMDPs # +########################################################## # Non-threaded function _bellman_helper!( @@ -595,9 +595,9 @@ function mccormick_branch(model, ambiguity_sets) end -################################################## -# O-Max-based Bellman operator for Factored IMDP # -################################################## +#################################################### +# O-Maximization-based Bellman operator for fIMDPs # +#################################################### function _bellman_helper!( workspace::FactoredIntervalOMaxWorkspace, strategy_cache::AbstractStrategyCache, @@ -753,3 +753,117 @@ Base.@propagate_inbounds function orthogonal_inner_bellman!( return dot(V, lower(ambiguity_set)) + gap_value(Vp_workspace, budget) end + + +########################################################## +# Vertex enumeration-based Bellman operator for fIMDPs # +########################################################## + +# Non-threaded +function _bellman_helper!( + workspace::FactoredVertexIteratorWorkspace, + strategy_cache::AbstractStrategyCache, + Vres, + V, + model; + upper_bound = false, + maximize = true, +) + for jₛ in CartesianIndices(source_shape(model)) + state_bellman!( + workspace, + strategy_cache, + Vres, + V, + model, + jₛ, + upper_bound, + maximize, + ) + end + + return Vres +end + +# Threaded +function _bellman_helper!( + workspace::ThreadedFactoredVertexIteratorWorkspace, + strategy_cache::AbstractStrategyCache, + Vres, + V, + model; + upper_bound = false, + maximize = true, +) + @threadstid tid for jₛ in CartesianIndices(source_shape(model)) + @inbounds ws = workspace[tid] + state_bellman!( + ws, + strategy_cache, + Vres, + V, + model, + jₛ, + upper_bound, + maximize, + ) + end + + return Vres +end + +function state_bellman!( + workspace::FactoredVertexIteratorWorkspace, + strategy_cache::OptimizingStrategyCache, + Vres, + V, + model, + jₛ, + upper_bound, + maximize, +) + @inbounds begin + for jₐ in CartesianIndices(action_shape(model)) + ambiguity_sets = getindex.(marginals(model), jₐ, jₛ) + workspace.actions[jₐ] = state_action_bellman(workspace, V, ambiguity_sets, upper_bound) + end + + Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, maximize) + end +end + +function state_bellman!( + workspace::FactoredVertexIteratorWorkspace, + strategy_cache::NonOptimizingStrategyCache, + Vres, + V, + model, + jₛ, + upper_bound, + maximize, +) + @inbounds begin + jₐ = CartesianIndex(strategy_cache[jₛ]) + ambiguity_sets = getindex.(marginals(model), jₐ, jₛ) + Vres[jₛ] = state_action_bellman(workspace, V, ambiguity_sets, upper_bound) + end +end + +Base.@propagate_inbounds function state_action_bellman( + workspace::FactoredVertexIteratorWorkspace, + V::AbstractArray{R}, + ambiguity_sets, + upper_bound, +) where {R} + iterators = vertex_generator.(ambiguity_sets, workspace.result_vectors) + + optval = upper_bound ? typemin(R) : typemax(R) + optfunc = upper_bound ? max : min + + for marginal_vertices in Iterators.product(iterators...) + v = sum(V[I] * prod(r -> marginal_vertices[r][I[r]], eachindex(ambiguity_sets)) for I in CartesianIndices(num_target.(ambiguity_sets))) + optval = optfunc(optval, v) + end + + return optval +end \ No newline at end of file diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index c1f7784c..718f82f1 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -251,15 +251,22 @@ support(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) where {R} = rowvals(p supportsize(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) where {R} = nnz(p.gap) # Vertex iterator for IntervalAmbiguitySet -struct IntervalAmbiguitySetVertexIterator{R, VR <: AbstractVector{R}, P <: Permutations} +struct IntervalAmbiguitySetVertexIterator{R, VR <: AbstractVector{R}, P <: Permutations} <: VertexIterator set::IntervalAmbiguitySet{R, VR} perm::P + result::Vector{R} # Preallocated result vector end -function IntervalAmbiguitySetVertexIterator(set::IntervalAmbiguitySet) +function IntervalAmbiguitySetVertexIterator(set::IntervalAmbiguitySet, result::Vector) perm = permutations(support(set)) - return IntervalAmbiguitySetVertexIterator(set, perm) + return IntervalAmbiguitySetVertexIterator(set, perm, result) +end + +function IntervalAmbiguitySetVertexIterator(set::IntervalAmbiguitySet) + v = Vector{valuetype(set)}(undef, num_target(set)) + return IntervalAmbiguitySetVertexIterator(set, v) end + Base.IteratorEltype(::Type{<:IntervalAmbiguitySetVertexIterator}) = Base.HasEltype() Base.eltype(::IntervalAmbiguitySetVertexIterator{R}) where {R} = Vector{R} Base.IteratorSize(::Type{<:IntervalAmbiguitySetVertexIterator}) = Base.HasLength() @@ -274,9 +281,13 @@ function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}) where {R, V (permutation, state) = res - v = copy(lower(it.set)) - budget = 1.0 - sum(v) + v = it.result + copyto!(v, lower(it.set)) + + # v = copy(lower(it.set)) + budget = one(R) - sum(v) for i in permutation + i = support(it.set)[i] if budget <= gap(it.set, i) v[i] += budget break @@ -297,9 +308,14 @@ function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}, state) wher end (permutation, state) = res - v = copy(lower(it.set)) - budget = 1.0 - sum(v) + + v = it.result + copyto!(v, lower(it.set)) + + # v = copy(lower(it.set)) + budget = one(R) - sum(v) for i in permutation + i = support(it.set)[i] if budget <= gap(it.set, i) v[i] += budget break @@ -313,4 +329,5 @@ function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}, state) wher end vertex_generator(p::IntervalAmbiguitySet) = IntervalAmbiguitySetVertexIterator(p) -vertices(p::IntervalAmbiguitySet) = collect(vertex_generator(p)) \ No newline at end of file +vertex_generator(p::IntervalAmbiguitySet, result::Vector) = IntervalAmbiguitySetVertexIterator(p, result) +vertices(p::IntervalAmbiguitySet) = map(copy, vertex_generator(p)) \ No newline at end of file diff --git a/src/probabilities/probabilities.jl b/src/probabilities/probabilities.jl index 7360a9f9..9bf58897 100644 --- a/src/probabilities/probabilities.jl +++ b/src/probabilities/probabilities.jl @@ -12,6 +12,8 @@ ispolytopic(::PolytopicAmbiguitySets) = IsPolytopic() abstract type AbstractAmbiguitySet end abstract type PolytopicAmbiguitySet <: AbstractAmbiguitySet end +abstract type VertexIterator end + """ num_sets(ambiguity_sets::AbstractAmbiguitySets) diff --git a/src/utils.jl b/src/utils.jl index ef047906..baa91405 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -24,6 +24,7 @@ valuetype(mp::ProductProcess) = valuetype(markov_process(mp)) valuetype(mp::FactoredRMDP) = promote_type(valuetype.(marginals(mp))...) valuetype(marginal::Marginal) = valuetype(ambiguity_sets(marginal)) valuetype(::IntervalAmbiguitySets{R}) where {R} = R +valuetype(::IntervalAmbiguitySet{R}) where {R} = R valuetype(::AbstractArray{R}) where {R} = R valuetype(::TimeVaryingStrategy{N, <:AbstractArray{NTuple{N, T}}}) where {N, T} = T diff --git a/src/workspace.jl b/src/workspace.jl index 9b081e3b..aec37ef5 100644 --- a/src/workspace.jl +++ b/src/workspace.jl @@ -227,4 +227,46 @@ function construct_workspace( else return ThreadedFactoredIntervalOMaxWorkspace(sys) end +end + +# Factored vertex iterator workspace +struct FactoredVertexIteratorWorkspace{N, T, AT <: AbstractArray{T}} + result_vectors::NTuple{N, Vector{T}} + actions::AT +end + +function FactoredVertexIteratorWorkspace(sys::FactoredRMDP) + N = length(marginals(sys)) + R = valuetype(sys) + + result_vectors = ntuple(r -> Vector{R}(undef, state_variables(sys, r)), N) + actions = Array{valuetype(sys)}(undef, action_shape(sys)) + + return FactoredVertexIteratorWorkspace(result_vectors, actions) +end + +struct ThreadedFactoredVertexIteratorWorkspace{N, T, AT <: AbstractArray{T}} + thread_workspaces::Vector{FactoredVertexIteratorWorkspace{N, T, AT}} +end + +function ThreadedFactoredVertexIteratorWorkspace(sys::FactoredRMDP) + nthreads = Threads.nthreads() + thread_workspaces = [FactoredVertexIteratorWorkspace(sys) for _ in 1:nthreads] + return ThreadedFactoredVertexIteratorWorkspace(thread_workspaces) +end + +Base.getindex(ws::ThreadedFactoredVertexIteratorWorkspace, i) = ws.thread_workspaces[i] + +function construct_workspace( + sys::FactoredRMDP, + ::Union{IsFIMDP, IsIMDP}, + ::VertexEnumeration; + threshold = 10, + kwargs... +) + if Threads.nthreads() == 1 || num_states(sys) <= threshold + return FactoredVertexIteratorWorkspace(sys) + else + return ThreadedFactoredVertexIteratorWorkspace(sys) + end end \ No newline at end of file diff --git a/test/base/factored.jl b/test/base/factored.jl index cc0afbd4..af8bb2c0 100644 --- a/test/base/factored.jl +++ b/test/base/factored.jl @@ -20,6 +20,42 @@ using Random: MersenneTwister V = N[1, 2, 3] + @testset "vertices" begin + verts = IntervalMDP.vertices(ambiguity_sets[1]) + @test length(verts) <= 6 # = number of permutations of 3 elements, may be less due to uniqueness + + expected_verts = N[ + 5//10 3//10 2//10 + 5//10 1//10 4//10 + 2//10 6//10 2//10 + 0 6//10 4//10 + 2//10 1//10 7//10 + 0 3//10 7//10 + ] + @test length(verts) ≥ size(expected_verts, 1) # at least the unique ones + @test all(any(v2 -> v1 ≈ v2, verts) for v1 in eachrow(expected_verts)) + + verts = IntervalMDP.vertices(ambiguity_sets[2]) + @test length(verts) <= 6 # = number of permutations of 3 elements modulo + + expected_verts = N[ # duplicates due to budget < gap for all elements + 6//10 3//10 1//10 + 5//10 4//10 1//10 + 5//10 3//10 2//10 + ] + @test length(verts) ≥ size(expected_verts, 1) # at least the unique ones + @test all(any(v2 -> v1 ≈ v2, verts) for v1 in eachrow(expected_verts)) + + verts = IntervalMDP.vertices(ambiguity_sets[3]) + @test length(verts) <= 6 # = number of permutations of 3 elements + + expected_verts = N[ # Only one vertex since sum(lower) = 1 + 2//10 3//10 5//10 + ] + @test length(verts) ≥ size(expected_verts, 1) # at least the unique ones + @test all(any(v2 -> v1 ≈ v2, verts) for v1 in eachrow(expected_verts)) + end + @testset "maximization" begin Vexpected = IntervalMDP.bellman(V, imc; upper_bound = true) # Using O-maximization, should be equivalent @@ -61,6 +97,45 @@ using Random: MersenneTwister upper_bound = true, ) @test Vres ≈ Vexpected + + ws = IntervalMDP.construct_workspace(imc, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = zeros(N, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = true, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.FactoredVertexIteratorWorkspace(imc) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = true, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.ThreadedFactoredVertexIteratorWorkspace(imc) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = true, + ) + @test Vres ≈ Vexpected end @testset "minimization" begin @@ -104,6 +179,45 @@ using Random: MersenneTwister upper_bound = false, ) @test Vres ≈ Vexpected + + ws = IntervalMDP.construct_workspace(imc, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = zeros(N, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = false, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.FactoredVertexIteratorWorkspace(imc) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = false, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.ThreadedFactoredVertexIteratorWorkspace(imc) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = false, + ) + @test Vres ≈ Vexpected end end @@ -112,7 +226,6 @@ using Random: MersenneTwister action_indices = (1,) state_vars = (2, 3) action_vars = (1,) - jₐ = CartesianIndex(1) marginal1 = Marginal(IntervalAmbiguitySets(; lower = N[ @@ -144,20 +257,26 @@ using Random: MersenneTwister 3 13 18 12 16 8 ] - eval_vertices(p, q) = sum(V[I] * p[I[1]] * q[I[2]] for I in CartesianIndices(state_vars)) #### Maximization @testset "maximization" begin - V_vertex = [ - maximum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) - ) for jₛ in CartesianIndices(state_vars) - ] # The maximum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = true, + ) + @test V_vertex ≈ N[ + 1076//75 4279//300 167//15 + 11107//900 4123//300 121//9 + ] + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 2, 3) @@ -247,15 +366,22 @@ using Random: MersenneTwister #### Minimization @testset "minimization" begin - V_vertex = [ - minimum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) - ) for jₛ in CartesianIndices(state_vars) - ] # The minimum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = false, + ) + + @test V_vertex ≈ N[ + 4399//450 41//5 488//45 + 1033//100 543//50 361//36 + ] ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -379,21 +505,21 @@ using Random: MersenneTwister 3 13 18 12 16 8 ] - eval_vertices(p, q) = sum(V[I] * p[I[1]] * q[I[2]] for I in CartesianIndices(state_vars)) #### Maximization @testset "max/max" begin - V_vertex = [ - maximum( - maximum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) - ) for jₐ in CartesianIndices(action_vars) - ) for jₛ in CartesianIndices(state_vars) - ] # The (inner) maximum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = true, + maximize = true, + ) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -489,17 +615,18 @@ using Random: MersenneTwister end @testset "min/max" begin - V_vertex = [ - minimum( - maximum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) - ) for jₐ in CartesianIndices(action_vars) - ) for jₛ in CartesianIndices(state_vars) - ] # The (inner) maximum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = true, + maximize = false, + ) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -596,17 +723,18 @@ using Random: MersenneTwister #### Minimization @testset "min/min" begin - V_vertex = [ - minimum( - minimum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) - ) for jₐ in CartesianIndices(action_vars) - ) for jₛ in CartesianIndices(state_vars) - ] # The (inner) minimum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = false, + maximize = false, + ) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -702,17 +830,18 @@ using Random: MersenneTwister end @testset "max/min" begin - V_vertex = [ - maximum( - minimum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) - ) for jₐ in CartesianIndices(action_vars) - ) for jₛ in CartesianIndices(state_vars) - ] # The (inner) minimum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = false, + maximize = true, + ) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -813,7 +942,6 @@ using Random: MersenneTwister action_indices = (1,) state_vars = (3, 3, 3) action_vars = (1,) - jₐ = CartesianIndex(1) marginal1 = Marginal(IntervalAmbiguitySets(; lower = N[ @@ -886,20 +1014,20 @@ using Random: MersenneTwister 2, ] V = reshape(V, 3, 3, 3) - eval_vertices(p, q, r) = sum(V[I] * p[I[1]] * q[I[2]] * r[I[3]] for I in CartesianIndices(state_vars)) #### Maximization @testset "maximization" begin - V_vertex = [ - maximum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal3[jₐ, jₛ]) - ) - ) for jₛ in CartesianIndices(state_vars) - ] # The maximum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = true, + ) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -990,16 +1118,17 @@ using Random: MersenneTwister #### Minimization @testset "minimization" begin - V_vertex = [ - minimum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal3[jₐ, jₛ]) - ) - ) for jₛ in CartesianIndices(state_vars) - ] # The minimum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = false, + ) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) From ee7a479be3103fb64bcc456877eb2b87e45e7f06 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 18 Sep 2025 12:30:47 +0200 Subject: [PATCH 22/71] Optimize vertex enumeration --- Project.toml | 2 - src/IntervalMDP.jl | 2 +- src/probabilities/IntervalAmbiguitySets.jl | 72 ++++++++++++++-------- test/base/factored.jl | 60 +++++++++--------- 4 files changed, 77 insertions(+), 59 deletions(-) diff --git a/Project.toml b/Project.toml index 6c7236e1..33c6b2b7 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,6 @@ authors = ["Frederik Baymler Mathiesen and contributors"] version = "0.6.0" [deps] -Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" CommonSolve = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2" HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" @@ -26,7 +25,6 @@ IntervalMDPCudaExt = ["Adapt", "CUDA", "GPUArrays", "LLVM"] [compat] Adapt = "4" CUDA = "5.1" -Combinatorics = "1.0.3" CommonSolve = "0.2.4" GPUArrays = "10, 11" HiGHS = "1.19.0" diff --git a/src/IntervalMDP.jl b/src/IntervalMDP.jl index 1b56343c..6af2d757 100644 --- a/src/IntervalMDP.jl +++ b/src/IntervalMDP.jl @@ -8,7 +8,6 @@ export solve # Import necessary libraries using LinearAlgebra, SparseArrays using JuMP, HiGHS -using Combinatorics: permutations, Permutations using StyledStrings ### Utilities @@ -54,6 +53,7 @@ public cu, cpu include("algorithms.jl") export OMaximization, LPMcCormickRelaxation, VertexEnumeration export RobustValueIteration +export default_algorithm, default_bellman_algorithm, bellman_algorithm include("utils.jl") include("threading.jl") diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index 718f82f1..1ca0739f 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -251,17 +251,11 @@ support(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) where {R} = rowvals(p supportsize(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) where {R} = nnz(p.gap) # Vertex iterator for IntervalAmbiguitySet -struct IntervalAmbiguitySetVertexIterator{R, VR <: AbstractVector{R}, P <: Permutations} <: VertexIterator +struct IntervalAmbiguitySetVertexIterator{R, VR <: AbstractVector{R}} <: VertexIterator set::IntervalAmbiguitySet{R, VR} - perm::P result::Vector{R} # Preallocated result vector end -function IntervalAmbiguitySetVertexIterator(set::IntervalAmbiguitySet, result::Vector) - perm = permutations(support(set)) - return IntervalAmbiguitySetVertexIterator(set, perm, result) -end - function IntervalAmbiguitySetVertexIterator(set::IntervalAmbiguitySet) v = Vector{valuetype(set)}(undef, num_target(set)) return IntervalAmbiguitySetVertexIterator(set, v) @@ -269,27 +263,21 @@ end Base.IteratorEltype(::Type{<:IntervalAmbiguitySetVertexIterator}) = Base.HasEltype() Base.eltype(::IntervalAmbiguitySetVertexIterator{R}) where {R} = Vector{R} -Base.IteratorSize(::Type{<:IntervalAmbiguitySetVertexIterator}) = Base.HasLength() -Base.length(it::IntervalAmbiguitySetVertexIterator) = length(it.perm) +Base.IteratorSize(::Type{<:IntervalAmbiguitySetVertexIterator}) = Base.SizeUnknown() function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}) where {R, VR <: AbstractVector{R}} - res = iterate(it.perm) - - if isnothing(res) - throw(ArgumentError("The iterator is empty.")) - end - - (permutation, state) = res + permutation = collect(1:length(support(it.set))) v = it.result copyto!(v, lower(it.set)) - - # v = copy(lower(it.set)) budget = one(R) - sum(v) - for i in permutation + + break_idx = 0 + for (j, i) in enumerate(permutation) i = support(it.set)[i] if budget <= gap(it.set, i) v[i] += budget + break_idx = j break else v[i] += gap(it.set, i) @@ -297,27 +285,57 @@ function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}) where {R, V end end - return v, state + return v, (permutation, break_idx) end function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}, state) where {R, VR <: AbstractVector{R}} - res = iterate(it.perm, state) + (permutation, last_break_idx) = state + + # Skip permutations that would lead to the same vertex + # based on the prefix 1:last_break_idx + break_j = nothing + for j in last_break_idx:-1:1 + # Find smallest permutation[k] in permutation[j+1:end] where permutation[j] < permutation[k] + next_in_suffix = nothing + for k in j+1:length(permutation) + if permutation[k] > permutation[j] + if isnothing(next_in_suffix) || permutation[k] < permutation[next_in_suffix] + next_in_suffix = k + end + end + end - if isnothing(res) + if isnothing(next_in_suffix) # No such k exists, continue to next j + continue + end + + # Swap + permutation[j], permutation[next_in_suffix] = permutation[next_in_suffix], permutation[j] + break_j = j + break + end + + if isnothing(break_j) return nothing end - (permutation, state) = res + sort!(@view(permutation[break_j+1:end])) + # Now compute the vertex for this new permutation v = it.result copyto!(v, lower(it.set)) - - # v = copy(lower(it.set)) budget = one(R) - sum(v) - for i in permutation + + if iszero(budget) + return nothing + end + + break_idx = 0 + for (j, i) in enumerate(permutation) i = support(it.set)[i] if budget <= gap(it.set, i) v[i] += budget + break_idx = j break else v[i] += gap(it.set, i) @@ -325,7 +343,7 @@ function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}, state) wher end end - return v, state + return v, (permutation, break_idx) end vertex_generator(p::IntervalAmbiguitySet) = IntervalAmbiguitySetVertexIterator(p) diff --git a/test/base/factored.jl b/test/base/factored.jl index af8bb2c0..61efe276 100644 --- a/test/base/factored.jl +++ b/test/base/factored.jl @@ -1218,7 +1218,7 @@ using Random: MersenneTwister end end - @testset for alg in [RobustValueIteration(LPMcCormickRelaxation()), RobustValueIteration(OMaximization())] + @testset for alg in [RobustValueIteration(LPMcCormickRelaxation()), RobustValueIteration(OMaximization()), RobustValueIteration(VertexEnumeration())] @testset "implicit sink state" begin @testset "first dimension" begin state_indices = (1, 2, 3) @@ -1566,39 +1566,41 @@ using Random: MersenneTwister @test all(V_ortho .<= one(N)) # Test against the naive construction - prob_lower_simple = zeros(N, 81, 81) - prob_upper_simple = zeros(N, 81, 81) - - lin = LinearIndices((3, 3, 3, 3)) - act_idx = CartesianIndex(1) - for I in CartesianIndices((3, 3, 3, 3)) - for J in CartesianIndices((3, 3, 3, 3)) - marginal_ambiguity_sets = map(marginal -> marginal[act_idx, I], marginals) - - prob_lower_simple[lin[J], lin[I]] = prod( - lower(marginal_ambiguity_sets[i], J[i]) for i in 1:4 - ) - - prob_upper_simple[lin[J], lin[I]] = prod( - upper(marginal_ambiguity_sets[i], J[i]) for i in 1:4 - ) + if !isa(bellman_algorithm(alg), VertexEnumeration) + prob_lower_simple = zeros(N, 81, 81) + prob_upper_simple = zeros(N, 81, 81) + + lin = LinearIndices((3, 3, 3, 3)) + act_idx = CartesianIndex(1) + for I in CartesianIndices((3, 3, 3, 3)) + for J in CartesianIndices((3, 3, 3, 3)) + marginal_ambiguity_sets = map(marginal -> marginal[act_idx, I], marginals) + + prob_lower_simple[lin[J], lin[I]] = prod( + lower(marginal_ambiguity_sets[i], J[i]) for i in 1:4 + ) + + prob_upper_simple[lin[J], lin[I]] = prod( + upper(marginal_ambiguity_sets[i], J[i]) for i in 1:4 + ) + end end - end - ambiguity_set = IntervalAmbiguitySets(; - lower = prob_lower_simple, - upper = prob_upper_simple, - ) + ambiguity_set = IntervalAmbiguitySets(; + lower = prob_lower_simple, + upper = prob_upper_simple, + ) - imc = IntervalMarkovChain(ambiguity_set) + imc = IntervalMarkovChain(ambiguity_set) - prop = FiniteTimeReachability([81], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(imc, spec) + prop = FiniteTimeReachability([81], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(imc, spec) - V_direct, it_direct, res_direct = solve(prob, alg) - @test V_direct[81] ≈ one(N) - @test all(V_ortho .≥ reshape(V_direct, 3, 3, 3, 3)) + V_direct, it_direct, res_direct = solve(prob, alg) + @test V_direct[81] ≈ one(N) + @test all(V_ortho .≥ reshape(V_direct, 3, 3, 3, 3)) + end end @testset "synthesis" begin From 21b89b8edade578b8f882f18d59dd3a10a8ffe80 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 18 Sep 2025 13:21:23 +0200 Subject: [PATCH 23/71] Test sparse vertex enumeration --- test/base/factored.jl | 2 +- test/sparse/factored.jl | 975 +++++++++++++++++++++++----------------- 2 files changed, 552 insertions(+), 425 deletions(-) diff --git a/test/base/factored.jl b/test/base/factored.jl index 61efe276..a9e601b8 100644 --- a/test/base/factored.jl +++ b/test/base/factored.jl @@ -36,7 +36,7 @@ using Random: MersenneTwister @test all(any(v2 -> v1 ≈ v2, verts) for v1 in eachrow(expected_verts)) verts = IntervalMDP.vertices(ambiguity_sets[2]) - @test length(verts) <= 6 # = number of permutations of 3 elements modulo + @test length(verts) <= 6 # = number of permutations of 3 elements expected_verts = N[ # duplicates due to budget < gap for all elements 6//10 3//10 1//10 diff --git a/test/sparse/factored.jl b/test/sparse/factored.jl index f7649676..b88bd85e 100644 --- a/test/sparse/factored.jl +++ b/test/sparse/factored.jl @@ -7,11 +7,11 @@ using Random: MersenneTwister ambiguity_sets = IntervalAmbiguitySets(; lower = sparse(N[ 0 5//10 2//10 - 1//10 0 3//10 + 1//10 0 3//10 2//10 1//10 5//10 ]), upper = sparse(N[ - 0 7//10 3//10 + 0 7//10 3//10 6//10 5//10 4//10 7//10 3//10 5//10 ]), @@ -20,6 +20,40 @@ using Random: MersenneTwister V = N[1, 2, 3] + @testset "vertices" begin + verts = IntervalMDP.vertices(ambiguity_sets[1]) + @test length(verts) <= 2 # = number of permutations of 2 elements + + expected_verts = N[ + 0 6//10 4//10 + 0 3//10 7//10 + ] + @test length(verts) ≥ size(expected_verts, 1) # at least the unique ones + @test all(any(v2 -> v1 ≈ v2, verts) for v1 in eachrow(expected_verts)) + + verts = IntervalMDP.vertices(ambiguity_sets[2]) + @test length(verts) <= 6 # = number of permutations of 3 elements + + expected_verts = N[ # duplicates due to budget < gap for all elements + 7//10 2//10 1//10 + 7//10 0 3//10 + 5//10 4//10 1//10 + 7//10 0//10 3//10 + 5//10 2//10 3//10 + ] + @test length(verts) ≥ size(expected_verts, 1) # at least the unique ones + @test all(any(v2 -> v1 ≈ v2, verts) for v1 in eachrow(expected_verts)) + + verts = IntervalMDP.vertices(ambiguity_sets[3]) + @test length(verts) <= 6 # = number of permutations of 3 elements + + expected_verts = N[ # Only one vertex since sum(lower) = 1 + 2//10 3//10 5//10 + ] + @test length(verts) ≥ size(expected_verts, 1) # at least the unique ones + @test all(any(v2 -> v1 ≈ v2, verts) for v1 in eachrow(expected_verts)) + end + @testset "maximization" begin Vexpected = IntervalMDP.bellman(V, imc; upper_bound = true) # Using O-maximization, should be equivalent @@ -61,6 +95,45 @@ using Random: MersenneTwister upper_bound = true, ) @test Vres ≈ Vexpected + + ws = IntervalMDP.construct_workspace(imc, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = zeros(N, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = true, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.FactoredVertexIteratorWorkspace(imc) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = true, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.ThreadedFactoredVertexIteratorWorkspace(imc) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = true, + ) + @test Vres ≈ Vexpected end @testset "minimization" begin @@ -104,6 +177,45 @@ using Random: MersenneTwister upper_bound = false, ) @test Vres ≈ Vexpected + + ws = IntervalMDP.construct_workspace(imc, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = zeros(N, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = false, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.FactoredVertexIteratorWorkspace(imc) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = false, + ) + @test Vres ≈ Vexpected + + ws = IntervalMDP.ThreadedFactoredVertexIteratorWorkspace(imc) + strategy_cache = IntervalMDP.construct_strategy_cache(imc) + Vres = similar(Vres) + IntervalMDP.bellman!( + ws, + strategy_cache, + Vres, + V, + imc; + upper_bound = false, + ) + @test Vres ≈ Vexpected end end @@ -112,7 +224,6 @@ using Random: MersenneTwister action_indices = (1,) state_vars = (2, 3) action_vars = (1,) - jₐ = CartesianIndex(1) marginal1 = Marginal(IntervalAmbiguitySets(; lower = sparse(N[ @@ -144,19 +255,25 @@ using Random: MersenneTwister 3 13 18 12 16 8 ] - eval_vertices(p, q) = sum(V[I] * p[I[1]] * q[I[2]] for I in CartesianIndices(state_vars)) #### Maximization @testset "maximization" begin - V_vertex = [ - maximum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) - ) for jₛ in CartesianIndices(state_vars) - ] # The maximum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = true, + ) + + @test V_vertex ≈ N[ + 1076//75 4279//300 1081//75 + 2821//225 4123//300 121//9 + ] ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -247,15 +364,22 @@ using Random: MersenneTwister #### Minimization @testset "minimization" begin - V_vertex = [ - minimum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) - ) for jₛ in CartesianIndices(state_vars) - ] # The minimum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = false, + ) + + @test V_vertex ≈ N[ + 412//45 41//5 488//45 + 1033//100 543//50 4253//450 + ] ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -345,8 +469,6 @@ using Random: MersenneTwister end end - - @testset "bellman 2d partial dependence" begin state_vars = (2, 3) action_vars = (1, 2) @@ -381,21 +503,21 @@ using Random: MersenneTwister 3 13 18 12 16 8 ] - eval_vertices(p, q) = sum(V[I] * p[I[1]] * q[I[2]] for I in CartesianIndices(state_vars)) #### Maximization @testset "max/max" begin - V_vertex = [ - maximum( - maximum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) - ) for jₐ in CartesianIndices(action_vars) - ) for jₛ in CartesianIndices(state_vars) - ] # The (inner) maximum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = true, + maximize = true, + ) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -491,17 +613,18 @@ using Random: MersenneTwister end @testset "min/max" begin - V_vertex = [ - minimum( - maximum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) - ) for jₐ in CartesianIndices(action_vars) - ) for jₛ in CartesianIndices(state_vars) - ] # The (inner) maximum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = true, + maximize = false, + ) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -598,17 +721,18 @@ using Random: MersenneTwister #### Minimization @testset "min/min" begin - V_vertex = [ - minimum( - minimum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) - ) for jₐ in CartesianIndices(action_vars) - ) for jₛ in CartesianIndices(state_vars) - ] # The (inner) minimum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = false, + maximize = false, + ) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -704,17 +828,18 @@ using Random: MersenneTwister end @testset "max/min" begin - V_vertex = [ - maximum( - minimum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]) - ) - ) for jₐ in CartesianIndices(action_vars) - ) for jₛ in CartesianIndices(state_vars) - ] # The (inner) minimum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 2, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = false, + maximize = true, + ) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -815,7 +940,6 @@ using Random: MersenneTwister action_indices = (1,) state_vars = (3, 3, 3) action_vars = (1,) - jₐ = CartesianIndex(1) marginal1 = Marginal(IntervalAmbiguitySets(; lower = sparse(N[ @@ -888,20 +1012,20 @@ using Random: MersenneTwister 2, ] V = reshape(V, 3, 3, 3) - eval_vertices(p, q, r) = sum(V[I] * p[I[1]] * q[I[2]] * r[I[3]] for I in CartesianIndices(state_vars)) #### Maximization @testset "maximization" begin - V_vertex = [ - maximum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal3[jₐ, jₛ]) - ) - ) for jₛ in CartesianIndices(state_vars) - ] # The maximum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = true, + ) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -992,16 +1116,17 @@ using Random: MersenneTwister #### Minimization @testset "minimization" begin - V_vertex = [ - minimum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal3[jₐ, jₛ]) - ) - ) for jₛ in CartesianIndices(state_vars) - ] # The minimum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = false, + ) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -1096,7 +1221,6 @@ using Random: MersenneTwister action_indices = (1,) state_vars = (3, 3, 3) action_vars = (1,) - jₐ = CartesianIndex(1) marginal1 = Marginal(IntervalAmbiguitySets(; lower = sparse(N[ @@ -1169,20 +1293,20 @@ using Random: MersenneTwister 2, ] V = reshape(V, 3, 3, 3) - eval_vertices(p, q, r) = sum(V[I] * p[I[1]] * q[I[2]] * r[I[3]] for I in CartesianIndices(state_vars)) #### Maximization @testset "maximization" begin - V_vertex = [ - maximum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal3[jₐ, jₛ]) - ) - ) for jₛ in CartesianIndices(state_vars) - ] # The maximum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = true, + ) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -1273,16 +1397,17 @@ using Random: MersenneTwister #### Minimization @testset "minimization" begin - V_vertex = [ - minimum( - splat(eval_vertices), - Iterators.product( - IntervalMDP.vertex_generator(marginal1[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal2[jₐ, jₛ]), - IntervalMDP.vertex_generator(marginal3[jₐ, jₛ]) - ) - ) for jₛ in CartesianIndices(state_vars) - ] # The minimum will always be a vertex + ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) + strategy_cache = IntervalMDP.construct_strategy_cache(mdp) + V_vertex = zeros(N, 3, 3, 3) + IntervalMDP.bellman!( + ws, + strategy_cache, + V_vertex, + V, + mdp; + upper_bound = false, + ) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -1372,317 +1497,319 @@ using Random: MersenneTwister end end - @testset "implicit sink state" begin - @testset "first dimension" begin - state_indices = (1, 2, 3) - action_indices = (1,) - state_vars = (3, 3, 3) - source_dims = (2, 3, 3) - action_vars = (1,) - - # Explicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//15 3//10 0 1//15 3//10 0 1//30 1//3 0 7//30 4//15 0 1//6 1//5 0 1//10 1//5 0 0 7//30 0 7//30 1//5 0 2//15 1//6 0 - 1//5 4//15 0 1//10 1//5 0 3//10 3//10 0 1//10 1//15 0 3//10 3//10 0 7//30 1//5 0 1//10 1//5 0 1//5 1//30 0 1//5 3//10 0 - 4//15 1//30 1 1//5 1//5 1 7//30 4//15 1 2//15 7//30 1 1//5 1//3 1 2//15 1//6 1 1//6 1//3 1 4//15 3//10 1 1//30 3//10 1 - ]), - upper = sparse(N[ - 7//15 17//30 0 13//30 3//5 0 17//30 17//30 0 17//30 13//30 0 3//5 2//3 0 11//30 7//15 0 0 1//2 0 17//30 13//30 0 7//15 13//30 0 - 8//15 1//2 0 3//5 7//15 0 8//15 17//30 0 2//3 17//30 0 11//30 7//15 0 19//30 19//30 0 13//15 1//2 0 17//30 13//30 0 3//5 11//30 0 - 11//30 1//3 1 2//5 8//15 1 7//15 3//5 1 2//3 17//30 1 2//3 8//15 1 2//15 3//5 1 2//3 3//5 1 17//30 2//3 1 7//15 8//15 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//10 1//15 1 3//10 0 0 1//6 1//15 0 1//15 1//6 1 1//6 1//30 0 1//10 1//10 0 1//3 2//15 1 3//10 4//15 0 2//15 2//15 0 - 3//10 1//5 0 3//10 2//15 1 0 1//30 0 0 1//15 0 1//30 7//30 1 1//30 1//15 0 7//30 1//15 0 1//6 1//30 1 1//10 1//15 0 - 3//10 4//15 0 1//10 3//10 0 2//15 1//3 1 3//10 1//10 0 1//6 3//10 0 7//30 1//6 1 1//15 1//15 0 1//10 1//5 0 1//5 4//15 1 - ]), - upper = sparse(N[ - 2//5 17//30 1 3//5 11//30 0 3//5 7//15 0 19//30 2//5 1 3//5 2//3 0 2//3 8//15 0 8//15 19//30 1 8//15 8//15 0 13//30 13//30 0 - 1//3 13//30 0 11//30 2//5 1 2//3 2//3 0 0 13//30 0 1//2 17//30 1 17//30 1//3 0 2//5 1//3 0 13//30 11//30 1 8//15 1//3 0 - 17//30 3//5 0 8//15 1//2 0 7//15 1//2 1 2//3 17//30 0 11//30 2//5 0 1//2 7//15 1 2//5 17//30 0 11//30 2//5 0 11//30 2//3 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 4//15 1//5 1 3//10 3//10 1 4//15 7//30 1 1//5 4//15 0 7//30 1//6 0 1//5 0 0 1//15 1//30 0 3//10 1//3 0 2//15 1//15 0 - 2//15 4//15 0 1//10 1//30 0 7//30 2//15 0 1//15 1//30 1 3//10 1//3 1 1//5 1//10 1 2//15 1//30 0 2//15 4//15 0 0 4//15 0 - 1//5 1//3 0 3//10 1//10 0 1//15 1//10 0 1//30 1//5 0 2//15 7//30 0 1//3 2//15 0 1//10 1//6 1 3//10 1//5 1 7//30 1//30 1 - ]), - upper = sparse(N[ - 3//5 17//30 1 1//2 3//5 1 19//30 2//5 1 8//15 1//3 0 11//30 2//5 0 17//30 13//30 0 2//5 3//5 0 3//5 11//30 0 1//2 11//30 0 - 3//5 2//3 0 13//30 19//30 0 1//3 2//5 0 17//30 7//15 1 11//30 3//5 1 19//30 7//15 1 2//5 8//15 0 17//30 11//30 0 19//30 13//30 0 - 3//5 2//3 0 1//2 1//2 0 2//3 7//15 0 3//5 3//5 0 1//2 1//3 0 2//5 8//15 0 2//5 11//30 1 1//3 8//15 1 7//15 13//30 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) - - # Implicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ]), - upper = sparse(N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ]), - upper = sparse(N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ]), - upper = sparse(N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) - - prop = FiniteTimeSafety([(3, i, j) for i in 1:3 for j in 1:3], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(mdp, spec) - implicit_prob = VerificationProblem(implicit_mdp, spec) - - V, k, res = solve(prob) - V_implicit, k_implicit, res_implicit = solve(implicit_prob) - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end - - @testset "second dimension" begin - state_indices = (1, 2, 3) - action_indices = (1,) - state_vars = (3, 3, 3) - source_dims = (3, 2, 3) - action_vars = (1,) - - # Explicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//15 3//10 1//15 3//10 1//30 1//3 1 0 0 7//30 4//15 1//6 1//5 1//10 1//5 1 0 0 0 7//30 7//30 1//5 2//15 1//6 1 0 0 - 1//5 4//15 1//10 1//5 3//10 3//10 0 1 0 1//10 1//15 3//10 3//10 7//30 1//5 0 1 0 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 - 4//15 1//30 1//5 1//5 7//30 4//15 0 0 1 2//15 7//30 1//5 1//3 2//15 1//6 0 0 1 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 - ]), - upper = sparse(N[ - 7//15 17//30 13//30 3//5 17//30 17//30 1 0 0 17//30 13//30 3//5 2//3 11//30 7//15 1 0 0 0 1//2 17//30 13//30 7//15 13//30 1 0 0 - 8//15 1//2 3//5 7//15 8//15 17//30 0 1 0 2//3 17//30 11//30 7//15 19//30 19//30 0 1 0 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 - 11//30 1//3 2//5 8//15 7//15 3//5 0 0 1 2//3 17//30 2//3 8//15 2//15 3//5 0 0 1 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//10 1//15 3//10 0 1//6 1//15 0 0 0 1//15 1//6 1//6 1//30 1//10 1//10 0 0 0 1//3 2//15 3//10 4//15 2//15 2//15 0 0 0 - 3//10 1//5 3//10 2//15 0 1//30 0 0 0 0 1//15 1//30 7//30 1//30 1//15 0 0 0 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 - 3//10 4//15 1//10 3//10 2//15 1//3 1 1 1 3//10 1//10 1//6 3//10 7//30 1//6 1 1 1 1//15 1//15 1//10 1//5 1//5 4//15 1 1 1 - ]), - upper = sparse(N[ - 2//5 17//30 3//5 11//30 3//5 7//15 0 0 0 19//30 2//5 3//5 2//3 2//3 8//15 0 0 0 8//15 19//30 8//15 8//15 13//30 13//30 0 0 0 - 1//3 13//30 11//30 2//5 2//3 2//3 0 0 0 0 13//30 1//2 17//30 17//30 1//3 0 0 0 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 - 17//30 3//5 8//15 1//2 7//15 1//2 1 1 1 2//3 17//30 11//30 2//5 1//2 7//15 1 1 1 2//5 17//30 11//30 2//5 11//30 2//3 1 1 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1 1 1 1//5 4//15 7//30 1//6 1//5 0 0 0 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 - 2//15 4//15 1//10 1//30 7//30 2//15 0 0 0 1//15 1//30 3//10 1//3 1//5 1//10 1 1 1 2//15 1//30 2//15 4//15 0 4//15 0 0 0 - 1//5 1//3 3//10 1//10 1//15 1//10 0 0 0 1//30 1//5 2//15 7//30 1//3 2//15 0 0 0 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 - ]), - upper = sparse(N[ - 3//5 17//30 1//2 3//5 19//30 2//5 1 1 1 8//15 1//3 11//30 2//5 17//30 13//30 0 0 0 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 - 3//5 2//3 13//30 19//30 1//3 2//5 0 0 0 17//30 7//15 11//30 3//5 19//30 7//15 1 1 1 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 - 3//5 2//3 1//2 1//2 2//3 7//15 0 0 0 3//5 3//5 1//2 1//3 2//5 8//15 0 0 0 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) - - # Implicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ]), - upper = sparse(N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ]), - upper = sparse(N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ]), - upper = sparse(N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) - - prop = FiniteTimeSafety([(i, 3, j) for i in 1:3 for j in 1:3], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(mdp, spec) - implicit_prob = VerificationProblem(implicit_mdp, spec) - - V, k, res = solve(prob) - V_implicit, k_implicit, res_implicit = solve(implicit_prob) - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end - - @testset "last dimension" begin - state_indices = (1, 2, 3) - action_indices = (1,) - state_vars = (3, 3, 3) - source_dims = (3, 3, 2) - action_vars = (1,) - - # Explicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1 0 0 1 0 0 1 0 0 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 0 1 0 0 1 0 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 0 0 1 0 0 1 - ]), - upper = sparse(N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 1 0 0 1 0 0 1 0 0 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 0 1 0 0 1 0 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 0 0 1 0 0 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1 1 1 0 0 0 0 0 0 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 1 1 1 0 0 0 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 0 0 0 0 0 0 1 1 1 - ]), - upper = sparse(N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 1 1 1 0 0 0 0 0 0 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 1 1 1 0 0 0 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 0 0 0 0 0 0 1 1 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 0 0 0 0 0 0 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 0 0 0 0 0 0 0 0 0 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 1 1 1 1 1 1 - ]), - upper = sparse(N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 0 0 0 0 0 0 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 0 0 0 0 0 0 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 1 1 1 1 1 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) - - # Implicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ]), - upper = sparse(N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ]), - upper = sparse(N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ]), - upper = sparse(N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) - - prop = FiniteTimeSafety([(i, j, 3) for i in 1:3 for j in 1:3], 10) - spec = Specification(prop, Pessimistic, Maximize) - prob = VerificationProblem(mdp, spec) - implicit_prob = VerificationProblem(implicit_mdp, spec) - - V, k, res = solve(prob) - V_implicit, k_implicit, res_implicit = solve(implicit_prob) - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + @testset for alg in [RobustValueIteration(LPMcCormickRelaxation()), RobustValueIteration(OMaximization()), RobustValueIteration(VertexEnumeration())] + @testset "implicit sink state" begin + @testset "first dimension" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + source_dims = (2, 3, 3) + action_vars = (1,) + + # Explicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//15 3//10 0 1//15 3//10 0 1//30 1//3 0 7//30 4//15 0 1//6 1//5 0 1//10 1//5 0 0 7//30 0 7//30 1//5 0 2//15 1//6 0 + 1//5 4//15 0 1//10 1//5 0 3//10 3//10 0 1//10 1//15 0 3//10 3//10 0 7//30 1//5 0 1//10 1//5 0 1//5 1//30 0 1//5 3//10 0 + 4//15 1//30 1 1//5 1//5 1 7//30 4//15 1 2//15 7//30 1 1//5 1//3 1 2//15 1//6 1 1//6 1//3 1 4//15 3//10 1 1//30 3//10 1 + ]), + upper = sparse(N[ + 7//15 17//30 0 13//30 3//5 0 17//30 17//30 0 17//30 13//30 0 3//5 2//3 0 11//30 7//15 0 0 1//2 0 17//30 13//30 0 7//15 13//30 0 + 8//15 1//2 0 3//5 7//15 0 8//15 17//30 0 2//3 17//30 0 11//30 7//15 0 19//30 19//30 0 13//15 1//2 0 17//30 13//30 0 3//5 11//30 0 + 11//30 1//3 1 2//5 8//15 1 7//15 3//5 1 2//3 17//30 1 2//3 8//15 1 2//15 3//5 1 2//3 3//5 1 17//30 2//3 1 7//15 8//15 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//10 1//15 1 3//10 0 0 1//6 1//15 0 1//15 1//6 1 1//6 1//30 0 1//10 1//10 0 1//3 2//15 1 3//10 4//15 0 2//15 2//15 0 + 3//10 1//5 0 3//10 2//15 1 0 1//30 0 0 1//15 0 1//30 7//30 1 1//30 1//15 0 7//30 1//15 0 1//6 1//30 1 1//10 1//15 0 + 3//10 4//15 0 1//10 3//10 0 2//15 1//3 1 3//10 1//10 0 1//6 3//10 0 7//30 1//6 1 1//15 1//15 0 1//10 1//5 0 1//5 4//15 1 + ]), + upper = sparse(N[ + 2//5 17//30 1 3//5 11//30 0 3//5 7//15 0 19//30 2//5 1 3//5 2//3 0 2//3 8//15 0 8//15 19//30 1 8//15 8//15 0 13//30 13//30 0 + 1//3 13//30 0 11//30 2//5 1 2//3 2//3 0 0 13//30 0 1//2 17//30 1 17//30 1//3 0 2//5 1//3 0 13//30 11//30 1 8//15 1//3 0 + 17//30 3//5 0 8//15 1//2 0 7//15 1//2 1 2//3 17//30 0 11//30 2//5 0 1//2 7//15 1 2//5 17//30 0 11//30 2//5 0 11//30 2//3 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 4//15 1//5 1 3//10 3//10 1 4//15 7//30 1 1//5 4//15 0 7//30 1//6 0 1//5 0 0 1//15 1//30 0 3//10 1//3 0 2//15 1//15 0 + 2//15 4//15 0 1//10 1//30 0 7//30 2//15 0 1//15 1//30 1 3//10 1//3 1 1//5 1//10 1 2//15 1//30 0 2//15 4//15 0 0 4//15 0 + 1//5 1//3 0 3//10 1//10 0 1//15 1//10 0 1//30 1//5 0 2//15 7//30 0 1//3 2//15 0 1//10 1//6 1 3//10 1//5 1 7//30 1//30 1 + ]), + upper = sparse(N[ + 3//5 17//30 1 1//2 3//5 1 19//30 2//5 1 8//15 1//3 0 11//30 2//5 0 17//30 13//30 0 2//5 3//5 0 3//5 11//30 0 1//2 11//30 0 + 3//5 2//3 0 13//30 19//30 0 1//3 2//5 0 17//30 7//15 1 11//30 3//5 1 19//30 7//15 1 2//5 8//15 0 17//30 11//30 0 19//30 13//30 0 + 3//5 2//3 0 1//2 1//2 0 2//3 7//15 0 3//5 3//5 0 1//2 1//3 0 2//5 8//15 0 2//5 11//30 1 1//3 8//15 1 7//15 13//30 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + # Implicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ]), + upper = sparse(N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ]), + upper = sparse(N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ]), + upper = sparse(N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + + prop = FiniteTimeSafety([(3, i, j) for i in 1:3 for j in 1:3], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) + implicit_prob = VerificationProblem(implicit_mdp, spec) + + V, k, res = solve(prob, alg) + V_implicit, k_implicit, res_implicit = solve(implicit_prob, alg) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end + + @testset "second dimension" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + source_dims = (3, 2, 3) + action_vars = (1,) + + # Explicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//15 3//10 1//15 3//10 1//30 1//3 1 0 0 7//30 4//15 1//6 1//5 1//10 1//5 1 0 0 0 7//30 7//30 1//5 2//15 1//6 1 0 0 + 1//5 4//15 1//10 1//5 3//10 3//10 0 1 0 1//10 1//15 3//10 3//10 7//30 1//5 0 1 0 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 + 4//15 1//30 1//5 1//5 7//30 4//15 0 0 1 2//15 7//30 1//5 1//3 2//15 1//6 0 0 1 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 + ]), + upper = sparse(N[ + 7//15 17//30 13//30 3//5 17//30 17//30 1 0 0 17//30 13//30 3//5 2//3 11//30 7//15 1 0 0 0 1//2 17//30 13//30 7//15 13//30 1 0 0 + 8//15 1//2 3//5 7//15 8//15 17//30 0 1 0 2//3 17//30 11//30 7//15 19//30 19//30 0 1 0 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 + 11//30 1//3 2//5 8//15 7//15 3//5 0 0 1 2//3 17//30 2//3 8//15 2//15 3//5 0 0 1 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//10 1//15 3//10 0 1//6 1//15 0 0 0 1//15 1//6 1//6 1//30 1//10 1//10 0 0 0 1//3 2//15 3//10 4//15 2//15 2//15 0 0 0 + 3//10 1//5 3//10 2//15 0 1//30 0 0 0 0 1//15 1//30 7//30 1//30 1//15 0 0 0 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 + 3//10 4//15 1//10 3//10 2//15 1//3 1 1 1 3//10 1//10 1//6 3//10 7//30 1//6 1 1 1 1//15 1//15 1//10 1//5 1//5 4//15 1 1 1 + ]), + upper = sparse(N[ + 2//5 17//30 3//5 11//30 3//5 7//15 0 0 0 19//30 2//5 3//5 2//3 2//3 8//15 0 0 0 8//15 19//30 8//15 8//15 13//30 13//30 0 0 0 + 1//3 13//30 11//30 2//5 2//3 2//3 0 0 0 0 13//30 1//2 17//30 17//30 1//3 0 0 0 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 + 17//30 3//5 8//15 1//2 7//15 1//2 1 1 1 2//3 17//30 11//30 2//5 1//2 7//15 1 1 1 2//5 17//30 11//30 2//5 11//30 2//3 1 1 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1 1 1 1//5 4//15 7//30 1//6 1//5 0 0 0 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 + 2//15 4//15 1//10 1//30 7//30 2//15 0 0 0 1//15 1//30 3//10 1//3 1//5 1//10 1 1 1 2//15 1//30 2//15 4//15 0 4//15 0 0 0 + 1//5 1//3 3//10 1//10 1//15 1//10 0 0 0 1//30 1//5 2//15 7//30 1//3 2//15 0 0 0 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 + ]), + upper = sparse(N[ + 3//5 17//30 1//2 3//5 19//30 2//5 1 1 1 8//15 1//3 11//30 2//5 17//30 13//30 0 0 0 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 + 3//5 2//3 13//30 19//30 1//3 2//5 0 0 0 17//30 7//15 11//30 3//5 19//30 7//15 1 1 1 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 + 3//5 2//3 1//2 1//2 2//3 7//15 0 0 0 3//5 3//5 1//2 1//3 2//5 8//15 0 0 0 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + # Implicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ]), + upper = sparse(N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ]), + upper = sparse(N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ]), + upper = sparse(N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + + prop = FiniteTimeSafety([(i, 3, j) for i in 1:3 for j in 1:3], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) + implicit_prob = VerificationProblem(implicit_mdp, spec) + + V, k, res = solve(prob, alg) + V_implicit, k_implicit, res_implicit = solve(implicit_prob, alg) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end + + @testset "last dimension" begin + state_indices = (1, 2, 3) + action_indices = (1,) + state_vars = (3, 3, 3) + source_dims = (3, 3, 2) + action_vars = (1,) + + # Explicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1 0 0 1 0 0 1 0 0 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 0 1 0 0 1 0 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 0 0 1 0 0 1 + ]), + upper = sparse(N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 1 0 0 1 0 0 1 0 0 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 0 1 0 0 1 0 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 0 0 1 0 0 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1 1 1 0 0 0 0 0 0 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 1 1 1 0 0 0 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 0 0 0 0 0 0 1 1 1 + ]), + upper = sparse(N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 1 1 1 0 0 0 0 0 0 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 1 1 1 0 0 0 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 0 0 0 0 0 0 1 1 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 0 0 0 0 0 0 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 0 0 0 0 0 0 0 0 0 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 1 1 1 1 1 1 + ]), + upper = sparse(N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 0 0 0 0 0 0 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 0 0 0 0 0 0 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 1 1 1 1 1 1 + ]) + ), state_indices, action_indices, state_vars, action_vars) + + mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + + # Implicit + marginal1 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ]), + upper = sparse(N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + marginal2 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ]), + upper = sparse(N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + marginal3 = Marginal(IntervalAmbiguitySets(; + lower = sparse(N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ]), + upper = sparse(N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ]) + ), state_indices, action_indices, source_dims, action_vars) + + implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + + prop = FiniteTimeSafety([(i, j, 3) for i in 1:3 for j in 1:3], 10) + spec = Specification(prop, Pessimistic, Maximize) + prob = VerificationProblem(mdp, spec) + implicit_prob = VerificationProblem(implicit_mdp, spec) + + V, k, res = solve(prob, alg) + V_implicit, k_implicit, res_implicit = solve(implicit_prob, alg) + + @test V ≈ V_implicit + @test k == k_implicit + @test res ≈ res_implicit + end end end end \ No newline at end of file From 6ed1d408ac83ebae288d6524e5cc57cfadb2c2b8 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 18 Sep 2025 17:20:56 +0200 Subject: [PATCH 24/71] Begin updating the documentation --- docs/make.jl | 3 +- docs/src/algorithms.md | 3 ++ docs/src/index.md | 46 +++++++++++++++++++++---------- docs/src/{theory.md => models.md} | 26 +---------------- docs/src/specifications.md | 25 +++++++++++++++++ 5 files changed, 63 insertions(+), 40 deletions(-) rename docs/src/{theory.md => models.md} (73%) create mode 100644 docs/src/specifications.md diff --git a/docs/make.jl b/docs/make.jl index 471ff931..09928d7f 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -18,7 +18,8 @@ makedocs(; "Home" => "index.md", "Usage" => "usage.md", "Data formats" => "data.md", - "Theory" => "theory.md", + "Models" => "models.md", + "Specifications" => "specifications.md", "Algorithms" => "algorithms.md", "Reference" => Any[ "Systems" => "reference/systems.md", diff --git a/docs/src/algorithms.md b/docs/src/algorithms.md index 9301d4dc..3663bb75 100644 --- a/docs/src/algorithms.md +++ b/docs/src/algorithms.md @@ -1,5 +1,8 @@ # Algorithms +!!! todo + Write about floating point precision and rational types. + To simplify the dicussion on the algorithmic choices, we will assume that the goal is to compute the maximizing pessimistic probability of reaching a set of states ``G``, that is, ```math diff --git a/docs/src/index.md b/docs/src/index.md index eafffb67..49724955 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -4,25 +4,43 @@ CurrentModule = IntervalMDP # IntervalMDP [IntervalMDP.jl](https://github.com/zinoex/IntervalMDP.jl) is a [Julia](https://julialang.org/) package for modeling -and certifying Interval Markov Decision Processes (IMDPs) via Value Iteration. +and verifying properties of various subclasses of factored Robust Markov Decision Processes (fRMDPs), in particular +Interval Markov Decision Processes (IMDPs) and factored IMDPs (fIMDPs) via Value Iteration. + +RMDPs are an extension of Markov Decision Processes (MDPs) that account for uncertainty in the transition +probabilities, and the factored variant introduces state and action variables such that the transition model +is a product of the transition models of the individual variables, allowing for more compact representations +and efficient algorithms. This package focuses on different subclasses of fRMDPs for which value iteration +can be performed efficiently including Interval Markov Chains (IMCs), IMDPs, orthogonally-decoupled IMDPs +(odIMDPs), and fIMDPs. See [Theory](@ref) for more information on these models. + +The aim of this package is to provide a user-friendly interface to solve verification and control synthesis +problems for fRMDPs with great efficiency, which includes methods for accelerating the computation using +CUDA hardware, pre-allocation, and other optimization techniques. See [Algorithms](@ref) for choices of the +algorithmic implementation of the Bellman operator; the package aims to provide a sensible default choice +of algorithms, but also allows the user to customize the algorithms to their needs. -IMDPs are a generalization of Markov Decision Processes (MDPs) where the transition probabilities -are represented by intervals instead of point values, to model uncertainty. IMDPs are also frequently -chosen as the model for abstracting the dynamics of a stochastic system, as one may compute upper -and lower bounds on transitioning from one region to another. +!!! info + For some subclasses of fRMDPs, the Bellman operator cannot be computed exactly, and thus, the provided + Bellman operators are sound approximations. See [Algorithms](@ref) for more information. -The aim of this package is to provide a user-friendly interface to solve value iteration for IMDPs -with great efficiency. Furthermore, it provides methods for accelerating the computation of the -certificate using CUDA hardware. See [Algorithms](@ref) for algorithmic advances that this package -introduces for enabling better use of the available hardware and higher performance. +The verification and control synthesis problems supported by this package include minimizing/maximizing +pessimistic/optimistic specifications over properties such as reachability, reach-avoid, safety, (discounted) +reward, and expected hitting times, and over finite and infinite horizons. For more complex properties, +the package supports Deterministic Finite Automata (DFA), with lazy product construction and efficient, +cache-friendly algorithms. See [Specifications](@ref) for more information on the supported specifications. -In addition, the package supports two new subclasses of robust MDPs, namely Orthogonally Decoupled IMDPs (OD-IMDPs), or just Orthogonal IMDPs, and mixtures of Orthogonal IMDPs. These models are designed to be more memory-efficient and computationally efficient than the general IMDP model and in many cases have smaller ambiguity sets, while still being able to represent a wide range of uncertainty. See [Theory](@ref) for more information on these models. +!!! info + We use the nomenclature "property" to refer to goal, which is both initializing the value function and + modifying it after every Bellman iteration, and "specification" to refer to whether to minimize or + maximize either the lower bound (pessimistic) or the upper bound (optimistic) of the value function. #### Features -- O-maximization and value iteration over IMDPs, OD-IMDPs and mixtures of OD-IMDPs -- Dense and sparse matrix support -- Parametric probability types for customizable precision -- Multithreaded CPU and CUDA-accelerated value iteration +- Value iteration over IMCs, IMDPs, odIMDPs, and fIMDPs. +- Multithreaded CPU and CUDA-accelerated value iteration. +- Dense and sparse matrix support. +- Parametric probability types (`Float64`, `Float32`, `Rational{BigInt}`) for customizable precision. Note that + `Rational{BigInt}` is not supported for CUDA acceleration. - Data loading and writing in formats by various tools (PRISM, bmdp-tool, IntervalMDP.jl) !!! info diff --git a/docs/src/theory.md b/docs/src/models.md similarity index 73% rename from docs/src/theory.md rename to docs/src/models.md index 8ab1484b..9d93e465 100644 --- a/docs/src/theory.md +++ b/docs/src/models.md @@ -1,4 +1,4 @@ -# Theory +# Models Notation: A probability distribution ``\gamma`` over a finite set ``S`` is a function ``\gamma : S \to [0, 1]`` satisfying ``\sum_{s \in S} \gamma(s) = 1``. We denote by ``\mathcal{D}(S)`` the set of all probability distributions over ``S``. For ``\underline{p}, \overline{p} : S \to [0, 1]`` such that ``\underline{p}(s) \leq \overline{p}(s)`` for each ``s \in S`` and ``\sum_{s \in S} \underline{p}(s) \leq 1 \leq \sum_{s \in S} \overline{p}(s)``, an interval ambiguity set ``\Gamma \subset \mathcal{D}(S)`` is the set of distributions such that ```math @@ -46,27 +46,3 @@ Formally, a mixture of OD-IMDPs ``M`` with ``K`` OD-IMDPs and ``n`` marginals is - ``\Gamma^\alpha = \{\Gamma^\alpha_{s,a}\}_{s \in S, a \in A}`` is a set of interval ambiguity sets for the weights of the mixture, i.e. over ``\{1, \ldots, K\}``. A feasible distribution for a mixture of OD-IMDPs is ``\sum_{r \in K} \alpha_{s,a}(r) \prod_{i = 1}^n \gamma_{r,s,a}`` where ``\alpha_{s,a} \in \Gamma^\alpha_{s,a}`` and ``\gamma_{r,s,a} \in \Gamma_{r,s,a}`` for each source-action pair ``(s, a)``. See [3] for more details on mixtures of OD-IMDPs. - -### Reachability -In this formal framework, we can describe computing reachability given a target set ``G`` and a horizon ``K \in \mathbb{N} \cup \{\infty\}`` as the following objective - -```math -{\mathop{opt}\limits_{\pi}}^{\pi} \; {\mathop{opt}\limits_{\eta}}^{\eta} \; \mathbb{P}_{\pi,\eta }\left[\omega \in \Omega : \exists k \in [0,K], \, \omega(k)\in G \right], -``` - -where ``\mathop{opt}^{\pi},\mathop{opt}^{\eta} \in \{\min, \max\}`` and ``\mathbb{P}_{\pi,\eta }`` is the probability of the Markov chain induced by strategy ``\pi`` and adversary ``\eta``. -When ``\mathop{opt}^{\eta} = \min``, the solution is called optimal _pessimistic_ probability (or reward), and conversely is called optimal _optimistic_ probability (or reward) when ``\mathop{opt}^{\eta} = \max``. -The choice of the min/max for the action and pessimistic/optimistic probability depends on the application. - -### Discounted reward -Discounted reward is similar to reachability but instead of a target set, we have a reward function ``r: S \to \mathbb{R}`` and a discount factor ``\gamma \in (0, 1)``. The objective is then - -```math -{\mathop{opt}\limits_{\pi}}^{\pi} \; {\mathop{opt}\limits_{\eta}}^{\eta} \; \mathbb{E}_{\pi,\eta }\left[\sum_{k=0}^{K} \gamma^k r(\omega(k)) \right]. -``` - -[1] Givan, Robert, Sonia Leach, and Thomas Dean. "Bounded-parameter Markov decision processes." Artificial Intelligence 122.1-2 (2000): 71-109. - -[2] Suilen, M., Badings, T., Bovy, E. M., Parker, D., & Jansen, N. (2024). Robust Markov Decision Processes: A Place Where AI and Formal Methods Meet. In Principles of Verification: Cycling the Probabilistic Landscape: Essays Dedicated to Joost-Pieter Katoen on the Occasion of His 60th Birthday, Part III (pp. 126-154). Cham: Springer Nature Switzerland. - -[3] Mathiesen, F. B., Haesaert, S., & Laurenti, L. (2024). Scalable control synthesis for stochastic systems via structural IMDP abstractions. arXiv preprint arXiv:2411.11803. \ No newline at end of file diff --git a/docs/src/specifications.md b/docs/src/specifications.md new file mode 100644 index 00000000..221715a9 --- /dev/null +++ b/docs/src/specifications.md @@ -0,0 +1,25 @@ +# Specifications + +## Reachability +In this formal framework, we can describe computing reachability given a target set ``G`` and a horizon ``K \in \mathbb{N} \cup \{\infty\}`` as the following objective + +```math +{\mathop{opt}\limits_{\pi}}^{\pi} \; {\mathop{opt}\limits_{\eta}}^{\eta} \; \mathbb{P}_{\pi,\eta }\left[\omega \in \Omega : \exists k \in [0,K], \, \omega(k)\in G \right], +``` + +where ``\mathop{opt}^{\pi},\mathop{opt}^{\eta} \in \{\min, \max\}`` and ``\mathbb{P}_{\pi,\eta }`` is the probability of the Markov chain induced by strategy ``\pi`` and adversary ``\eta``. +When ``\mathop{opt}^{\eta} = \min``, the solution is called optimal _pessimistic_ probability (or reward), and conversely is called optimal _optimistic_ probability (or reward) when ``\mathop{opt}^{\eta} = \max``. +The choice of the min/max for the action and pessimistic/optimistic probability depends on the application. + +## Discounted reward +Discounted reward is similar to reachability but instead of a target set, we have a reward function ``r: S \to \mathbb{R}`` and a discount factor ``\gamma \in (0, 1)``. The objective is then + +```math +{\mathop{opt}\limits_{\pi}}^{\pi} \; {\mathop{opt}\limits_{\eta}}^{\eta} \; \mathbb{E}_{\pi,\eta }\left[\sum_{k=0}^{K} \gamma^k r(\omega(k)) \right]. +``` + +[1] Givan, Robert, Sonia Leach, and Thomas Dean. "Bounded-parameter Markov decision processes." Artificial Intelligence 122.1-2 (2000): 71-109. + +[2] Suilen, M., Badings, T., Bovy, E. M., Parker, D., & Jansen, N. (2024). Robust Markov Decision Processes: A Place Where AI and Formal Methods Meet. In Principles of Verification: Cycling the Probabilistic Landscape: Essays Dedicated to Joost-Pieter Katoen on the Occasion of His 60th Birthday, Part III (pp. 126-154). Cham: Springer Nature Switzerland. + +[3] Mathiesen, F. B., Haesaert, S., & Laurenti, L. (2024). Scalable control synthesis for stochastic systems via structural IMDP abstractions. arXiv preprint arXiv:2411.11803. \ No newline at end of file From e4cf63f13c1a95e44f9f7c73bdf616f701b8b457 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 18 Sep 2025 22:47:45 +0200 Subject: [PATCH 25/71] Start adding documentation to all these changes --- README.md | 33 ++++--- docs/Project.toml | 1 + docs/make.jl | 15 +++- docs/src/algorithms.md | 2 +- docs/src/index.md | 22 ++--- docs/src/models.md | 71 ++++++++++----- docs/src/reference/systems.md | 82 ++++++++--------- docs/src/references.md | 3 + docs/src/refs.bib | 87 +++++++++++++++++++ docs/src/usage.md | 26 ++++-- .../FactoredRobustMarkovDecisionProcess.jl | 32 ++++++- src/models/IntervalMarkovChain.jl | 9 ++ src/models/IntervalMarkovDecisionProcess.jl | 21 +++++ src/models/ProductProcess.jl | 4 +- src/probabilities/IntervalAmbiguitySets.jl | 37 ++++++-- src/probabilities/Marginal.jl | 60 ++++++++++++- src/probabilities/probabilities.jl | 2 +- 17 files changed, 394 insertions(+), 113 deletions(-) create mode 100644 docs/src/references.md create mode 100644 docs/src/refs.bib diff --git a/README.md b/README.md index 87d2aec6..b00c2fd6 100644 --- a/README.md +++ b/README.md @@ -4,23 +4,28 @@ [![Build Status](https://github.com/zinoex/IntervalMDP.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/zinoex/IntervalMDP.jl/actions/workflows/CI.yml?query=branch%3Amain) [![Codecov](https://codecov.io/gh/Zinoex/IntervalMDP.jl/graph/badge.svg?token=K62S0148BK)](https://codecov.io/gh/Zinoex/IntervalMDP.jl) -IntervalMDP.jl is a Julia package for modeling and certifying Interval Markov Decision Processes (IMDPs) via Value Iteration. +for modeling +and verifying properties of various subclasses of factored Robust Markov Decision Processes (fRMDPs), in particular +Interval Markov Decision Processes (IMDPs) and factored IMDPs (fIMDPs) via Value Iteration. -IMDPs are a generalization of Markov Decision Processes (MDPs) where the transition probabilities -are represented by intervals instead of point values, to model uncertainty. IMDPs are also frequently -chosen as the model for abstracting the dynamics of a stochastic system, as one may compute upper -and lower bounds on transitioning from one region to another. +RMDPs are an extension of Markov Decision Processes (MDPs) that account for uncertainty in the transition +probabilities, and the factored variant introduces state and action variables such that the transition model +is a product of the transition models of the individual variables, allowing for more compact representations +and efficient algorithms. This package focuses on different subclasses of fRMDPs for which value iteration +can be performed efficiently including Interval Markov Chains (IMCs), IMDPs, orthogonally-decoupled IMDPs +(odIMDPs), and fIMDPs. -The aim of this package is to provide a user-friendly interface to solve value iteration for IMDPs -with great efficiency. Furthermore, it provides methods for accelerating the computation of the -certificate using CUDA hardware. +The aim of this package is to provide a user-friendly interface to solve verification and control synthesis +problems for fRMDPs with great efficiency, which includes methods for accelerating the computation using +CUDA hardware, pre-allocation, and other optimization techniques. ## Features -- Value iteration (Bellman operator via O-maximization) -- Dense and sparse matrix support -- Parametric probability and value types for customizable precision including rationals and floating-point numbers -- Multithreaded CPU and CUDA-accelerated value iteration -- Data loading and writing in formats by various tools (PRISM, bmdp-tool, IMDP.jl) +- Value iteration over IMCs, IMDPs, odIMDPs, and fIMDPs. +- Multithreaded CPU and CUDA-accelerated value iteration. +- Dense and sparse matrix support. +- Parametric probability types (`Float64`, `Float32`, `Rational{BigInt}`) for customizable precision. Note that + `Rational{BigInt}` is not supported for CUDA acceleration. +- Data loading and writing in formats by various tools (PRISM, bmdp-tool, IntervalMDP.jl) ## Installation @@ -44,7 +49,7 @@ The goal is to compute the maximum pessimistic probability of reaching state 3 w using IntervalMDP # IMC -prob = IntervalProbabilities(; +prob = IntervalAmbiguitySets(; lower = [ 0.0 0.5 0.0 0.1 0.3 0.0 diff --git a/docs/Project.toml b/docs/Project.toml index b2a4a0d8..ec0de06c 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,5 +1,6 @@ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" IntervalMDP = "051c988a-e73c-45a4-90ec-875cac0402c7" Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" diff --git a/docs/make.jl b/docs/make.jl index 09928d7f..6d19c183 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,9 +1,14 @@ using IntervalMDP, IntervalMDP.Data -using Documenter +using Documenter, DocumenterCitations push!(LOAD_PATH, "../src/") DocMeta.setdocmeta!(IntervalMDP, :DocTestSetup, :(using IntervalMDP); recursive = true) +bib = CitationBibliography( + joinpath(@__DIR__, "src", "refs.bib"); + style=:numeric +) + makedocs(; modules = [IntervalMDP, IntervalMDP.Data], authors = "Frederik Baymler Mathiesen and contributors", @@ -17,20 +22,22 @@ makedocs(; pages = [ "Home" => "index.md", "Usage" => "usage.md", - "Data formats" => "data.md", "Models" => "models.md", "Specifications" => "specifications.md", "Algorithms" => "algorithms.md", - "Reference" => Any[ + "API reference" => Any[ "Systems" => "reference/systems.md", "Specifications" => "reference/specifications.md", "Solve Interface" => "reference/solve.md", "Data Storage" => "reference/data.md", + "Index" => "api.md", ], - "Index" => "api.md", + "Data formats" => "data.md", + "References" => "references.md", ], doctest = false, checkdocs = :exports, + plugins = [bib], ) deploydocs(; repo = "github.com/Zinoex/IntervalMDP.jl", devbranch = "main") diff --git a/docs/src/algorithms.md b/docs/src/algorithms.md index 3663bb75..50512a89 100644 --- a/docs/src/algorithms.md +++ b/docs/src/algorithms.md @@ -9,7 +9,7 @@ To simplify the dicussion on the algorithmic choices, we will assume that the go \max_{\pi} \; \min_{\eta} \; \mathbb{P}_{\pi,\eta }\left[\omega \in \Omega : \exists k \in [0,K], \, \omega(k)\in G \right]. ``` -See [Theory](@ref) for more details on the theory behind IMDPs including strategies and adversaries; in this case the maximization and minimization operators respectively. The algorithms are easily adapted to other specifications, such as minimizing optimistic probability, which is useful for safety, or maximizing pessimitic discounted reward. Assume furthermore that the transition probabilities are represented as a sparse matrix. +See [Models](@ref) for more details on the formal definition of fRMDPs, strategies, and adversaries; in this case the maximization and minimization operators respectively. The algorithms are easily adapted to other specifications, such as minimizing optimistic probability, which is useful for safety, or maximizing pessimitic discounted reward. Assume furthermore that the transition probabilities are represented as a sparse matrix. This is the most common representation for large models, and the algorithms are easily adapted to dense matrices with the sorting (see [Sorting](@ref)) being shared across states such that parallelizing this has a smaller impact on performance. ## Solving reachability as value iteration diff --git a/docs/src/index.md b/docs/src/index.md index 49724955..d594e668 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -12,7 +12,7 @@ probabilities, and the factored variant introduces state and action variables su is a product of the transition models of the individual variables, allowing for more compact representations and efficient algorithms. This package focuses on different subclasses of fRMDPs for which value iteration can be performed efficiently including Interval Markov Chains (IMCs), IMDPs, orthogonally-decoupled IMDPs -(odIMDPs), and fIMDPs. See [Theory](@ref) for more information on these models. +(odIMDPs), and fIMDPs. See [Models](@ref) for more information on these models. The aim of this package is to provide a user-friendly interface to solve verification and control synthesis problems for fRMDPs with great efficiency, which includes methods for accelerating the computation using @@ -31,21 +31,21 @@ the package supports Deterministic Finite Automata (DFA), with lazy product cons cache-friendly algorithms. See [Specifications](@ref) for more information on the supported specifications. !!! info - We use the nomenclature "property" to refer to goal, which is both initializing the value function and - modifying it after every Bellman iteration, and "specification" to refer to whether to minimize or - maximize either the lower bound (pessimistic) or the upper bound (optimistic) of the value function. + We use the nomenclature "property" to refer to goal, which defines both how the value function + is initialized and how it is updated after every Bellman iteration, and "specification" refers to whether + to minimize or maximize either the lower bound (pessimistic) or the upper bound (optimistic) of the value function. #### Features - Value iteration over IMCs, IMDPs, odIMDPs, and fIMDPs. +- Plenty of built-in specifications including reachability, safety, reach-avoid, discounted reward, and expected hitting times. +- Support for complex specifications via Deterministic Finite Automata (DFA) with lazy product construction. - Multithreaded CPU and CUDA-accelerated value iteration. - Dense and sparse matrix support. - Parametric probability types (`Float64`, `Float32`, `Rational{BigInt}`) for customizable precision. Note that `Rational{BigInt}` is not supported for CUDA acceleration. -- Data loading and writing in formats by various tools (PRISM, bmdp-tool, IntervalMDP.jl) - -!!! info - Until now, all state-of-the-art tools for IMDPs have been standalone programs. - We choose to develop this as a a package to enable better integration with other tools and libraries and improving the extensibility. +- Data loading and writing in formats by various tools (PRISM, bmdp-tool, IntervalMDP.jl). +- Extensible and modular design to allow for custom models, distributed storage and computation, novel specifications, + and additional Bellman operator and model checking algorithms, and integration with other tools and libraries[^1]. ## Installation @@ -60,4 +60,6 @@ julia> import Pkg; Pkg.add("IntervalMDP") If you want to use the CUDA extension, you also need to install `CUDA.jl`: ```julia julia> import Pkg; Pkg.add("CUDA") -``` \ No newline at end of file +``` + +[^1]: State-of-the-art tools for IMDPs are all standalone programs. We choose to develop this as a a package to enable better integration with other tools and improving the extensibility. \ No newline at end of file diff --git a/docs/src/models.md b/docs/src/models.md index 9d93e465..47997332 100644 --- a/docs/src/models.md +++ b/docs/src/models.md @@ -1,48 +1,79 @@ # Models -Notation: A probability distribution ``\gamma`` over a finite set ``S`` is a function ``\gamma : S \to [0, 1]`` satisfying ``\sum_{s \in S} \gamma(s) = 1``. We denote by ``\mathcal{D}(S)`` the set of all probability distributions over ``S``. -For ``\underline{p}, \overline{p} : S \to [0, 1]`` such that ``\underline{p}(s) \leq \overline{p}(s)`` for each ``s \in S`` and ``\sum_{s \in S} \underline{p}(s) \leq 1 \leq \sum_{s \in S} \overline{p}(s)``, an interval ambiguity set ``\Gamma \subset \mathcal{D}(S)`` is the set of distributions such that +#### Mathematical Notation +We denote the natural numbers by ``\mathbb{N}`` and ``\mathbb{N}_0 = \mathbb{N} \cup \{0\}``. A probability distribution ``\gamma`` over a finite set ``S`` is a function ``\gamma : S \to [0, 1]`` satisfying ``\sum_{s \in S} \gamma(s) = 1``. We denote by ``\mathcal{D}(S)`` the set of all probability distributions over ``S``. +For ``\underline{\gamma}, \overline{\gamma} : S \to [0, 1]`` such that ``\underline{\gamma}(s) \leq \overline{\gamma}(s)`` for each ``s \in S`` and ``\sum_{s \in S} \underline{\gamma}(s) \leq 1 \leq \sum_{s \in S} \overline{\gamma}(s)``, an interval ambiguity set ``\Gamma \subset \mathcal{D}(S)`` is the set of distributions such that ```math - \Gamma = \{ \gamma \in \mathcal{D}(S) \,:\, \underline{p}(s) \leq \gamma(s) \leq \overline{p}(s) \text{ for each } s\in S \}. + \Gamma = \{ \gamma \in \mathcal{D}(S) \,:\, \underline{\gamma}(s) \leq \gamma(s) \leq \overline{\gamma}(s) \text{ for each } s\in S \}. ``` -``\underline{p}, \overline{p}`` are referred to as the interval bounds of the interval ambiguity set. -For ``n`` finite sets ``S_1, \ldots, S_n`` we denote by ``S_1 \times \cdots \times S_n`` their Cartesian product. Given ``S=S_1 \times \cdots \times S_n`` and ``n`` ambiguity sets ``\Gamma_i \in \mathcal{D}(S_i)``, ``i = 1, \ldots, n``, the product ambiguity set ``\Gamma \subseteq \mathcal{D}(S)`` is defined as: +``\underline{\gamma}, \overline{\gamma}`` are referred to as the interval bounds of the interval ambiguity set. +For ``n`` finite sets ``S_1, \ldots, S_n`` we denote by ``S_1 \times \cdots \times S_n`` their Cartesian product. Given ``S = S_1 \times \cdots \times S_n`` and ``n`` ambiguity sets ``\Gamma_i \in \mathcal{D}(S_i)``, ``i = 1, \ldots, n``, the product ambiguity set ``\Gamma \subseteq \mathcal{D}(S)`` is defined as: ```math \Gamma = \left\{ \gamma \in \mathcal{D}(S) \,:\, \gamma(s) = \prod_{i=1}^n \gamma^i(s^i), \, \gamma^i \in \Gamma_i \right\} ``` -where ``s = (s_1, \ldots, s_n)\in S``. We will denote the product ambiguity set as ``\Gamma = \bigotimes_{i=1}^n \Gamma_i``. Each ``\Gamma_i`` is called a marginal or component ambiguity set. +where ``s = (s_1, \ldots, s_n) \in S``. We will denote the product ambiguity set as ``\Gamma = \bigotimes_{i=1}^n \Gamma_i``. Each ``\Gamma_i`` is called a marginal or component ambiguity set. + +## Factored RMDPs +Factored Robust Markov Decision Processes (fRMDPs) [schnitzer2025efficient, delgado2011efficient](@cite) are an extension of Robust Markov Decision Processes (RMDPs) [nilim2005robust, wiesemann2013robust, suilen2024robust](@cite) that incorporate a factored representation of the state and action spaces, i.e. with state and action variables. This allows for a more compact representation of the transition model and flexibility in modeling complex systems. First, we define here fRMDPs, and then in the subsequent sections, we define various special subclasses of fRMDPs, including how they relate to each other and to fRMDPs. + +Formally, a fRMDP ``M`` is a tuple ``M = (S, S_0, A, \mathcal{G}, \Gamma)``, where + +- ``S = S_1 \times \cdots \times S_n`` is a finite set of joint states with ``S_i`` being a finite set of states for the ``i``-th state variable, +- ``S_0 \subseteq S`` is a set of initial states, +- ``A = A_1 \times \cdots \times A_m`` is a finite set of joint actions with ``A_j`` being a finite set of actions for the ``j``-th action variable, +- ``\mathcal{G} = (\mathcal{V}, \mathcal{E})`` is a directed bipartite graph with nodes ``\mathcal{V} = \mathcal{V}_{ind} \cup \mathcal{V}_{cond} = \{S_1, \ldots, S_n, A_1, \ldots, A_m\} \cup \{S'_1, \ldots, S'_n\}`` representing the state and action variables and their next-state counterparts, and edges ``\mathcal{E} \subseteq \mathcal{V}_{ind} \times \mathcal{V}_{cond}`` representing dependencies of ``S'_i`` on ``S_j`` and ``A_k``, +- ``\Gamma = \{\Gamma_{s,a}\}_{s\in S,a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\Gamma_{s,a} = \bigotimes_{i=1}^n \Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` is a product of ambiguity sets ``\Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` along each marginal ``i`` conditional on the values in ``(s, a)`` of the parent variables ``\text{Pa}_\mathcal{G}(S'_i)`` of ``S'_i`` in ``\mathcal{G}``, i.e. +```math + \Gamma_{s,a} = \left\{ \gamma \in \mathcal{D}(S) \,:\, \gamma(s') = \prod_{i=1}^n \gamma^i(s'_i | s_{\text{Pa}_{\mathcal{G}_S}(S'_i)}, a_{\text{Pa}_{\mathcal{G}_A}(S'_i)}), \, \gamma^i(\cdot | s_{\text{Pa}_{\mathcal{G}_S}(S'_i)}, a_{\text{Pa}_{\mathcal{G}_A}(S'_i)}) \in \Gamma^i_{\text{Pa}_\mathcal{G}(S'_i)} \right\}. +``` + +A path of an fRMDP is a sequence of states and actions ``\omega = (s[0], a[0]), (s[1], a[1]), \dots`` where ``(s[k], a[k]) \in S \times A`` for all ``k \in \mathbb{N}_0``. We denote by ``\omega[k] = s[k]`` the state of the path at time ``k \in \mathbb{N}_0`` and by ``\Omega`` and ``\Omega_{fin}`` the set of all infinite and finite paths, respectively. +A _strategy_ or _policy_ for an fRMDP is a function ``\pi : \Omega_{fin} \to A`` that assigns an action, given a (finite) path called the history. _Time-dependent_ Markov strategies are functions from state and time step to an action, i.e. ``\pi : S \times \mathbb{N}_0 \to A``. This can equivalently be described as a sequence of functions indexed by time ``\mathbf{\pi} = (\pi[0], \pi[1], \ldots)``. If ``\pi`` does not depend on time and solely depends on the current state, it is called a _stationary_ strategy. Similar to a strategy, an adversary ``\eta`` is a function that assigns a feasible distribution to a given state. The focus of this package is on dynamic uncertainties where the choice of the adversary is resolved at every time step, called dynamic uncertainty, and where the adversary has access to both the current state and action, called ``(s, a)``-rectangularity. We refer to [suilen2024robust](@cite) for further details on the distinction between static and dynamic uncertainties, types of rectangularity, and their implications. Given a strategy and an adversary, an fRMDP collapses to a finite (factored) Markov chain. + +## IMCs +Interval Markov Chains (IMCs) [delahaye2011decision](@cite) are a subclass of fRMDPs and a generalization of Markov Chains (MCs), where the transition probabilities are not known exactly, but they are constrained to be in some probability interval. +Formally, an IMC ``M`` is a tuple ``M = (S, S_0, \Gamma)``, where + +- ``S`` is a finite set of states, +- ``S_0 \subseteq S`` is a set of initial states, +- ``\Gamma = \{\Gamma_{s}\}_{s\in S}`` is a set of ambiguity sets for source state ``s``, where each ``\Gamma_{s}`` is an interval ambiguity set over ``S``. + +An IMC is equivalent to an fRMDP where there is only one state variable, no action variables, and the ambiguity sets are interval ambiguity sets. The dependency graph is just two nodes ``S`` and ``S'`` with a single edge from the former to the latter. Paths and adversaries are defined similarly to fRMDPs. ## IMDPs -Interval Markov Decision Processes (IMDPs), also called bounded-parameter MDPs [1], are a generalization of MDPs, where the transition probabilities, given source state and action, are not known exactly, but they are constrained to be in some probability interval. -Formally, an IMDP ``M`` is a tuple ``M = (S, S_0`, A, \Gamma)``, where +Interval Markov Decision Processes (IMDPs) [givan2000bounded, lahijanian2015formal](@cite), also called bounded-parameter MDPs, are a subclass of fRMDPs and a generalization of MDPs, where the transition probabilities, given source state and action, are not known exactly, but they are constrained to be in some probability interval. IMDPs generalized IMCs by adding actions. +Formally, an IMDP ``M`` is a tuple ``M = (S, S_0, A, \Gamma)``, where - ``S`` is a finite set of states, - ``S_0 \subseteq S`` is a set of initial states, - ``A`` is a finite set of actions, - ```\Gamma = \{\Gamma_{s,a}\}_{s\in S,a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\Gamma_{s,a}`` is an interval ambiguity set over ``S``. -A path of an IMDP is a sequence of states and actions ``\omega = (s_0,a_0),(s_1,a_1),\dots``, where ``(s_i,a_i)\in S \times A``. We denote by ``\omega(k) = s_k`` the state of the path at time ``k \in \mathbb{N}^0`` and by ``\Omega`` the set of all paths. -A _strategy_ or _policy_ for an IMDP is a function ``\pi`` that assigns an action to a given state of an IMDP. _Time-dependent_ strategies are functions from state and time step to an action, i.e. ``\pi: S\times \mathbb{N}^0 \to A``. If ``\pi`` does not depend on time and solely depends on the current state, it is called a _stationary_ strategy. Similar to a strategy, an adversary ``\eta`` is a function that assigns a feasible distribution to a given state. Given a strategy and an adversary, an IMDP collapses to a finite Markov chain. +An IMDP is equivalent to an fRMDP where there is only one state variable, one action variable, and the ambiguity sets are interval ambiguity sets. The dependency graph is three nodes ``S``, ``A``, and ``S'`` with two edges ``S \rightarrow S'`` and ``A \rightarrow S'``. Paths and adversaries are defined similarly to fRMDPs. -## OD-IMDPs -Orthogonally Decoupled IMDPs (OD-IMDPs) are a subclass of robust MDPs that are designed to be more memory-efficient and computationally efficient than the general IMDP model. The states are structured into an orthogonal, or grid-based, decomposition, and and the transition probability ambiguity sets, for each source-action pair (note the ``(s, a)``-rectangularity [2]), as a product of interval ambiguity sets along each marginal. +## odIMDPs +Orthogonally-decoupled IMDPs (odIMDPs) [mathiesen2025scalable](@cite) are a subclass of fRMDPs designed to be more memory-efficient than IMDPs. The states are structured into an orthogonal, or grid-based, decomposition and the transition probability ambiguity sets, for each source-action pair, as a product of interval ambiguity sets along each marginal. -Formally, an OD-IMDP ``M`` with ``n`` marginals is a tuple ``M = (S, S_0, A, \Gamma)``, where +Formally, an odIMDP ``M`` with ``n`` marginals is a tuple ``M = (S, S_0, A, \Gamma)``, where - ``S = S_1 \times \cdots \times S_n`` is a finite set of joint states with ``S_i`` being a finite set of states for the ``i``-th marginal, - ``S_0 \subseteq S`` is a set of initial states, - ``A`` is a finite set of actions, - ``\Gamma = \{\Gamma_{s,a}\}_{s\in S,a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\Gamma_{s,a} = \bigotimes_{i=1}^n \Gamma^i_{s,a}`` with ``\Gamma^i_{s,a}`` is an interval ambiguity set over the ``i``-th marginal, i.e. over ``S_i``. -Paths, strategies, and adversaries are defined similarly to IMDPs. See [3] for more details on OD-IMDPs. +An odIMDP is equivalent to an fRMDP where the dependency graph is ``\mathcal{G} = (\mathcal{V}, \mathcal{E})`` with ``\mathcal{V} = \{S_1, \ldots, S_n, A\} \cup \{S'_1, \ldots, S'_n\}`` and ``\mathcal{E} = \{(S_i, S'_j) : i, j = 1, \ldots, n\} \cup \{(A_i, S'_j) : j = 1, \ldots, m, i = 1, \ldots, n\}``. In other words, each next-state variable ``S'_i`` depends on all state and action variables and the dependency graph is a complete bipartite graph. Paths, strategies, and adversaries are defined similarly to fRMDPs. -## Mixtures of OD-IMDPs -Mixtures of OD-IMDPs are included to address the issue the OD-IMDPs may not be able to represent all uncertainty in the transition probabilities. The mixture model is a convex combination of OD-IMDPs, where each OD-IMDP has its own set of ambiguity sets. Furthermore, the weights of the mixture are also interval-valued. +## fIMDPs +Factored IMDPs (fIMDPs) are a subclass of fRMDPs where each marginal ambiguity set is an interval ambiguity set, but where the dependency graph can be arbitrary. +Formally, an fIMDP ``M`` with ``n`` marginals is a tuple ``M = (S, S_0, A, \mathcal{G}, \Gamma)``, where -Formally, a mixture of OD-IMDPs ``M`` with ``K`` OD-IMDPs and ``n`` marginals is a tuple ``M = (S, S_0, A, \Gamma, \Gamma^\alpha)``, where - ``S = S_1 \times \cdots \times S_n`` is a finite set of joint states with ``S_i`` being a finite set of states for the ``i``-th marginal, - ``S_0 \subseteq S`` is a set of initial states, - ``A`` is a finite set of actions, -- ``\Gamma = \{\Gamma_{r,s,a}\}_{r \in K, s\in S,a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)`` and OD-IMDP ``R``, where each ``\Gamma_{r,s,a} = \bigotimes_{i=1}^n \Gamma^i_{r,s,a}`` with ``\Gamma^i_{r,s,a}`` is an interval ambiguity set over the ``i``-th marginal, i.e. over ``S_i``. -- ``\Gamma^\alpha = \{\Gamma^\alpha_{s,a}\}_{s \in S, a \in A}`` is a set of interval ambiguity sets for the weights of the mixture, i.e. over ``\{1, \ldots, K\}``. +- ``\mathcal{G} = (\mathcal{V}, \mathcal{E})`` is a directed bipartite graph with nodes ``\mathcal{V} = \{S_1, \ldots, S_n, A_1, \ldots, A_m\} \cup \{S'_1, \ldots, S'_n\}`` representing the state and action variables and their next-state counterparts, and edges ``\mathcal{E} \subseteq \mathcal{V} \times \mathcal{V}`` representing dependencies of ``S'_i`` on ``S_j`` and ``A_k``, +- ``\Gamma = \{\Gamma_{s,a}\}_{s\in S,a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\Gamma_{s,a} = \bigotimes_{i=1}^n \Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` with ``\Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` is an interval ambiguity set over the ``i``-th marginal, i.e. over ``S_i``, conditional on the values in ``(s, a)`` of the parent variables ``\text{Pa}_\mathcal{G}(S'_i)`` of ``S'_i`` in ``\mathcal{G}``. -A feasible distribution for a mixture of OD-IMDPs is ``\sum_{r \in K} \alpha_{s,a}(r) \prod_{i = 1}^n \gamma_{r,s,a}`` where ``\alpha_{s,a} \in \Gamma^\alpha_{s,a}`` and ``\gamma_{r,s,a} \in \Gamma_{r,s,a}`` for each source-action pair ``(s, a)``. See [3] for more details on mixtures of OD-IMDPs. +## References +```@bibliography +Pages = ["models.md"] +Canonical = false +``` \ No newline at end of file diff --git a/docs/src/reference/systems.md b/docs/src/reference/systems.md index 2f0e223b..58ec2fb9 100644 --- a/docs/src/reference/systems.md +++ b/docs/src/reference/systems.md @@ -2,19 +2,28 @@ ```@docs IntervalMarkovProcess -num_states(s::IntervalMarkovProcess) -initial_states(s::IntervalMarkovProcess) +num_states +num_actions +initial_states(mp::IntervalMarkovProcess) AllStates -transition_prob(mp::IntervalMarkovProcess) -IntervalMarkovDecisionProcess +``` + +## Factored RMDPs +```@docs +FactoredRobustMarkovDecisionProcess +state_variables(s::FactoredRobustMarkovDecisionProcess) +action_variables(s::FactoredRobustMarkovDecisionProcess) +marginals(s::FactoredRobustMarkovDecisionProcess) +``` + +## Convenience constructors for subclasses of fRMDPs +```@docs IntervalMarkovChain -stateptr(mdp::IntervalMarkovDecisionProcess) -OrthogonalIntervalMarkovDecisionProcess -OrthogonalIntervalMarkovChain -stateptr(mdp::OrthogonalIntervalMarkovDecisionProcess) -MixtureIntervalMarkovDecisionProcess -MixtureIntervalMarkovChain -stateptr(mdp::MixtureIntervalMarkovDecisionProcess) +IntervalMarkovDecisionProcess +``` + +## Deterministic Finite Automaton (DFA) +```@docs DFA num_states(dfa::DFA) num_labels(dfa::DFA) @@ -29,45 +38,28 @@ labelling_function(proc::ProductProcess) ``` ## Probability representation - -### Interval ambiguity sets ```@docs -IntervalProbabilities -lower(p::IntervalProbabilities) -lower(p::IntervalProbabilities, i, j) -upper(p::IntervalProbabilities) -upper(p::IntervalProbabilities, i, j) -gap(p::IntervalProbabilities) -gap(p::IntervalProbabilities, i, j) -num_source(p::IntervalProbabilities) -num_target(p::IntervalProbabilities) -axes_source(p::IntervalProbabilities) -``` +Marginal +ambiguity_sets(m::Marginal) +state_variables(m::Marginal) +action_variables(m::Marginal) +source_shape(m::Marginal) +action_shape(m::Marginal) +num_target(m::Marginal) +getindex(p::Marginal, action, source) -### Marginal interval ambiguity sets -```@docs -OrthogonalIntervalProbabilities -lower(p::OrthogonalIntervalProbabilities, l) -lower(p::OrthogonalIntervalProbabilities, l, i, j) -upper(p::OrthogonalIntervalProbabilities, l) -upper(p::OrthogonalIntervalProbabilities, l, i, j) -gap(p::OrthogonalIntervalProbabilities, l) -gap(p::OrthogonalIntervalProbabilities, l, i, j) -sum_lower(p::OrthogonalIntervalProbabilities, l) -sum_lower(p::OrthogonalIntervalProbabilities, l, j) -num_source(p::OrthogonalIntervalProbabilities) -num_target(p::OrthogonalIntervalProbabilities) -axes_source(p::OrthogonalIntervalProbabilities) +num_sets +support ``` -### Mixtures of marginal interval ambiguity sets +### Interval ambiguity sets ```@docs -MixtureIntervalProbabilities -num_source(p::MixtureIntervalProbabilities) -num_target(p::MixtureIntervalProbabilities) -axes_source(p::MixtureIntervalProbabilities) -mixture_probs -weighting_probs +IntervalAmbiguitySets +num_sets(p::IntervalAmbiguitySets) +num_target(p::IntervalAmbiguitySets) +lower +upper +gap ``` ### Labelling of IMDP states to Automaton alphabet diff --git a/docs/src/references.md b/docs/src/references.md new file mode 100644 index 00000000..df106f26 --- /dev/null +++ b/docs/src/references.md @@ -0,0 +1,3 @@ +# Bibliography +```@bibliography +``` \ No newline at end of file diff --git a/docs/src/refs.bib b/docs/src/refs.bib new file mode 100644 index 00000000..86c0b2d7 --- /dev/null +++ b/docs/src/refs.bib @@ -0,0 +1,87 @@ +@article{givan2000bounded, + title={Bounded-parameter Markov decision processes}, + author={Givan, Robert and Leach, Sonia and Dean, Thomas}, + journal={Artificial Intelligence}, + volume={122}, + number={1-2}, + pages={71--109}, + year={2000}, + publisher={Elsevier} +} + +@article{nilim2005robust, + title={Robust control of Markov decision processes with uncertain transition matrices}, + author={Nilim, Arnab and El Ghaoui, Laurent}, + journal={Operations Research}, + volume={53}, + number={5}, + pages={780--798}, + year={2005}, + publisher={INFORMS} +} + +@inproceedings{delahaye2011decision, + title={Decision problems for interval Markov chains}, + author={Delahaye, Beno{\^\i}t and Larsen, Kim G and Legay, Axel and Pedersen, Mikkel L and W{\k{a}}sowski, Andrzej}, + booktitle={International Conference on Language and Automata Theory and Applications}, + pages={274--285}, + year={2011}, + organization={Springer} +} + +@article{delgado2011efficient, + title={Efficient solutions to factored MDPs with imprecise transition probabilities}, + author={Delgado, Karina Valdivia and Sanner, Scott and De Barros, Leliane Nunes}, + journal={Artificial Intelligence}, + volume={175}, + number={9-10}, + pages={1498--1527}, + year={2011}, + publisher={Elsevier} +} + +@article{wiesemann2013robust, + title={Robust Markov decision processes}, + author={Wiesemann, Wolfram and Kuhn, Daniel and Rustem, Ber{\c{c}}}, + journal={Mathematics of Operations Research}, + volume={38}, + number={1}, + pages={153--183}, + year={2013}, + publisher={INFORMS} +} + +@article{lahijanian2015formal, + title={Formal verification and synthesis for discrete-time stochastic systems}, + author={Lahijanian, Morteza and Andersson, Sean B and Belta, Calin}, + journal={IEEE Transactions on Automatic Control}, + volume={60}, + number={8}, + pages={2031--2045}, + year={2015}, + publisher={IEEE} +} + +@incollection{suilen2024robust, + title={Robust markov decision processes: A place where AI and formal methods meet}, + author={Suilen, Marnix and Badings, Thom and Bovy, Eline M and Parker, David and Jansen, Nils}, + booktitle={Principles of Verification: Cycling the Probabilistic Landscape: Essays Dedicated to Joost-Pieter Katoen on the Occasion of His 60th Birthday, Part III}, + pages={126--154}, + year={2024}, + publisher={Springer} +} + +@inproceedings{mathiesen2025scalable, + title={Scalable control synthesis for stochastic systems via structural IMDP abstractions}, + author={Mathiesen, Frederik Baymler and Haesaert, Sofie and Laurenti, Luca}, + booktitle={Proceedings of the 28th ACM International Conference on Hybrid Systems: Computation and Control}, + pages={1--12}, + year={2025} +} + +@article{schnitzer2025efficient, + title={Efficient Solution and Learning of Robust Factored MDPs}, + author={Schnitzer, Yannik and Abate, Alessandro and Parker, David}, + journal={arXiv preprint arXiv:2508.00707}, + year={2025} +} diff --git a/docs/src/usage.md b/docs/src/usage.md index d8666a81..37255399 100644 --- a/docs/src/usage.md +++ b/docs/src/usage.md @@ -14,7 +14,7 @@ An example of how to construct either is the following: using IntervalMDP # IMC -prob = IntervalProbabilities(; +prob = IntervalAmbiguitySets(; lower = [ 0.0 0.5 0.0 0.1 0.3 0.0 @@ -31,7 +31,7 @@ initial_states = [1] # Initial states are optional mc = IntervalMarkovChain(prob, initial_states) # IMDP -prob1 = IntervalProbabilities(; +prob1 = IntervalAmbiguitySets(; lower = [ 0.0 0.5 0.1 0.3 @@ -44,7 +44,7 @@ prob1 = IntervalProbabilities(; ], ) -prob2 = IntervalProbabilities(; +prob2 = IntervalAmbiguitySets(; lower = [ 0.1 0.2 0.2 0.3 @@ -58,8 +58,16 @@ prob2 = IntervalProbabilities(; ) prob3 = IntervalProbabilities(; - lower = [0.0; 0.0; 1.0], - upper = [0.0; 0.0; 1.0] + lower = [ + 0.0 0.0 + 0.0 0.0 + 0.1 0.1 + ], + upper = [ + 0.0 0.0 + 0.0 0.0 + 0.1 0.1 + ], ) transition_probs = [prob1, prob2, prob3] @@ -141,7 +149,7 @@ store the transition matrices in the [compressed sparse column (CSC)](https://en This is a format that is widely used in Julia and other languages, and is supported by many linear algebra operations. It consists of three arrays: `colptr`, `rowval` and `nzval`. The `colptr` array stores the indices of the first non-zero value in each column. The `rowval` array stores the row indices of the non-zero values, and the `nzval` array stores the non-zero values. -We choose this format, since source states are on the columns (see [`IntervalProbabilities`](@ref) for more information about the structure of the transition probability matrices). +We choose this format, since source states are on the columns (see [`IntervalAmbiguitySets`](@ref) for more information about the structure of the transition probability matrices). Thus the non-zero values for each source state is stored in sequentially in memory, enabling efficient memory access. To use `SparseMatrixCSC`, we need to load `SparseArrays`. Below is an example of how to construct an `IntervalMarkovChain` with sparse transition matrices. @@ -177,7 +185,7 @@ upper ``` ```julia -prob = IntervalProbabilities(; lower = lower, upper = upper) +prob = IntervalAmbiguitySets(; lower = lower, upper = upper) initial_state = 1 imc = IntervalMarkovChain(prob, initial_state) ``` @@ -215,7 +223,7 @@ Similar to `CUDA.jl`, we provide a `cu` function that transfers the model to the or transfer the transition matrices separately. ```julia # Transfer entire model to GPU -prob = IntervalProbabilities(; +prob = IntervalAmbiguitySets(; lower = sparse_hcat( SparseVector(3, [2, 3], [0.1, 0.2]), SparseVector(3, [1, 2, 3], [0.5, 0.3, 0.1]), @@ -231,7 +239,7 @@ prob = IntervalProbabilities(; mc = IntervalMDP.cu(IntervalMarkovChain(prob, 1)) # Transfer transition matrices separately -prob = IntervalProbabilities(; +prob = IntervalAmbiguitySets(; lower = IntervalMDP.cu(sparse_hcat( SparseVector(3, [2, 3], [0.1, 0.2]), SparseVector(3, [1, 2, 3], [0.5, 0.3, 0.1]), diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl index d03ff3c4..e0e979d3 100644 --- a/src/models/FactoredRobustMarkovDecisionProcess.jl +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -1,4 +1,16 @@ +""" + FactoredRobustMarkovDecisionProcess +!!! todo + Add intuitive description and formal definition + +!!! todo + Add fields reference and relation to definition + +!!! todo + Add example + +""" struct FactoredRobustMarkovDecisionProcess{ N, M, @@ -130,12 +142,30 @@ function check_initial_states(state_vars, initial_states) end end +""" + state_variables(mdp::FactoredRMDP) + +Return a tuple with the number of states for each state variable in the fRMDP. +""" state_variables(mdp::FactoredRMDP) = mdp.state_vars state_variables(mdp::FactoredRMDP, r) = mdp.state_vars[r] + +""" + action_variables(mdp::FactoredRMDP) + +Return a tuple with the number of actions for each action variable in the fRMDP. +""" action_variables(mdp::FactoredRMDP) = mdp.action_vars + +""" + marginals(mdp::FactoredRMDP) + +Return the marginals of the fRMDP. +""" +marginals(mdp::FactoredRMDP) = mdp.transition + num_states(mdp::FactoredRMDP) = prod(state_variables(mdp)) num_actions(mdp::FactoredRMDP) = prod(action_variables(mdp)) -marginals(mdp::FactoredRMDP) = mdp.transition initial_states(mdp::FactoredRMDP) = mdp.initial_states source_shape(m::FactoredRMDP) = m.source_dims diff --git a/src/models/IntervalMarkovChain.jl b/src/models/IntervalMarkovChain.jl index 868969f4..6f9978f0 100644 --- a/src/models/IntervalMarkovChain.jl +++ b/src/models/IntervalMarkovChain.jl @@ -17,7 +17,16 @@ function IntervalMarkovChain(marginal::Marginal{<:IntervalAmbiguitySets}, initia ) end +""" + IntervalMarkovChain(ambiguity_set::IntervalAmbiguitySets, initial_states=AllStates()) +A convenience constructor for a [`FactoredRobustMarkovDecisionProcess`](@ref) representing an interval Markov chain +from a single [`IntervalAmbiguitySets`](@ref) object. See [IMCs](@ref) for the formal definition. + +!!! todo + Add example + +""" function IntervalMarkovChain(ambiguity_set::IntervalAmbiguitySets, initial_states=AllStates()) source_dims = (num_sets(ambiguity_set),) action_vars = (1,) diff --git a/src/models/IntervalMarkovDecisionProcess.jl b/src/models/IntervalMarkovDecisionProcess.jl index e6d4ccc7..15a54d03 100644 --- a/src/models/IntervalMarkovDecisionProcess.jl +++ b/src/models/IntervalMarkovDecisionProcess.jl @@ -13,6 +13,16 @@ function IntervalMarkovDecisionProcess(marginal::Marginal{<:IntervalAmbiguitySet ) end +""" + IntervalMarkovDecisionProcess(ambiguity_set::IntervalAmbiguitySets, num_actions::Integer, initial_states::InitialStates = AllStates()) + +A convenience constructor for a [`FactoredRobustMarkovDecisionProcess`](@ref) representing an interval Markov decision process +from a single [`IntervalAmbiguitySets`](@ref) object and a specified number of actions. See [IMDPs](@ref) for the formal definition. + +!!! todo + Add example + +""" function IntervalMarkovDecisionProcess(ambiguity_set::IntervalAmbiguitySets, num_actions::Integer, initial_states::InitialStates = AllStates()) if num_sets(ambiguity_set) % num_actions != 0 throw(ArgumentError("The number of sets in the ambiguity set must be a multiple of the number of actions.")) @@ -25,6 +35,17 @@ function IntervalMarkovDecisionProcess(ambiguity_set::IntervalAmbiguitySets, num return IntervalMarkovDecisionProcess(marginal, initial_states) end +""" + IntervalMarkovDecisionProcess(ps::Vector{<:IntervalAmbiguitySets}, initial_states::InitialStates = AllStates()) + +A convenience constructor for a [`FactoredRobustMarkovDecisionProcess`](@ref) representing an interval Markov decision process +from a vector of [`IntervalAmbiguitySets`](@ref) objects, one for each state and with the same number of actions in each. +See [IMDPs](@ref) for the formal definition. + +!!! todo + Add example + +""" function IntervalMarkovDecisionProcess( ps::Vector{<:IntervalAmbiguitySets}, initial_states::InitialStates = AllStates(), diff --git a/src/models/ProductProcess.jl b/src/models/ProductProcess.jl index ef43ae2f..23fd2fc8 100644 --- a/src/models/ProductProcess.jl +++ b/src/models/ProductProcess.jl @@ -5,7 +5,7 @@ L <: AbstractLabelling, } -A type representing the product between interval Markov processes (e.g. [`IntervalMarkovDecisionProcess`](@ref) or [`OrthogonalIntervalMarkovDecisionProcess`](@ref)) +A type representing the product between interval Markov processes (e.g. [`FactoredRobustMarkovDecisionProcess`](@ref)) and an automaton (typically a deterministic finite automaton [`DFA`](@ref)). Formally, given an interval Markov process ``M = (S, A, \\Gamma, S_{0})``, a labelling function ``L : S \\to 2^{AP}``, and a DFA ``D = (Q, 2^{AP}, \\delta, q_{0}, Q_{ac})``, @@ -17,7 +17,7 @@ then a product process is a tuple ``M_{prod} = (Z, A, \\Gamma^{prod}, Z_{ac}, Z_ - ``\\Gamma^{prod} = \\{\\Gamma^{prod}_{z,a}\\}_{z \\in Z, a \\in A}`` where ``\\Gamma^{prod}_{z,a} = \\{ \\gamma_{z,a} : \\gamma_{z,a}((t, z')) = \\gamma_{s,a}(t)\\delta_{q,L(s)}(z') \\}`` is a set of ambiguity sets on the product transition probabilities, for each product source-action pair. -See [`IntervalMarkovDecisionProcess`](@ref) and [`DFA`](@ref) for more information on the structure, definition, and usage of the DFA and IMDP. +See [`FactoredRobustMarkovDecisionProcess`](@ref) and [`DFA`](@ref) for more information on the structure, definition, and usage of the DFA and IMDP. ### Fields - `mdp::M`: contains details for the interval Markov process. diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index 1ca0739f..027ba66f 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -1,5 +1,8 @@ """ - IntervalAmbiguitySets{R, MR <: AbstractMatrix{R}, N, M, I} + IntervalAmbiguitySets{R, MR <: AbstractMatrix{R}} + +!!! todo + Update description A matrix pair to represent the lower and upper bound transition probabilities from all source/action pairs to all target states. The matrices can be `Matrix{R}` or `SparseMatrixCSC{R}`, or their CUDA equivalents. For memory efficiency, it is recommended to use sparse matrices. @@ -9,12 +12,11 @@ Mathematically, let ``P`` be the probability matrix. Then ``P_{ij}`` represents Due to the column-major format of Julia, this is also a more efficient representation (in terms of cache locality). The lower bound is explicitly stored, while the upper bound is computed from the lower bound and the gap. This choice is -because it simplifies repeated probability assignment using O-maximization [1]. +because it simplifies repeated probability assignment using O-maximization [givan2000bounded, lahijanian2015formal](@cite). ### Fields - `lower::MR`: The lower bound transition probabilities from a source state or source/action pair to a target state. - `gap::MR`: The gap between upper and lower bound transition probabilities from a source state or source/action pair to a target state. -- `sum_lower::VR`: The sum of lower bound transition probabilities from a source state or source/action pair to all target states. ### Examples ```jldoctest @@ -35,8 +37,6 @@ sparse_prob = IntervalAmbiguitySets(; ) ``` -[1] M. Lahijanian, S. B. Andersson and C. Belta, "Formal Verification and Synthesis for Discrete-Time Stochastic Systems," in IEEE Transactions on Automatic Control, vol. 60, no. 8, pp. 2031-2045, Aug. 2015, doi: 10.1109/TAC.2015.2398883. - """ struct IntervalAmbiguitySets{R, MR <: AbstractMatrix{R}} <: PolytopicAmbiguitySets lower::MR @@ -171,8 +171,20 @@ function checkprobabilities!(lower::AbstractSparseMatrix, gap::AbstractSparseMat end end +""" + num_target(ambiguity_set::IntervalAmbiguitySets) + +Return the number of target states in the IntervalAmbiguitySets object. +""" num_target(p::IntervalAmbiguitySets) = size(p.lower, 1) + +""" + num_sets(ambiguity_set::IntervalAmbiguitySets) + +Return the number of ambiguity sets in the IntervalAmbiguitySets object. +""" num_sets(p::IntervalAmbiguitySets) = size(p.lower, 2) + source_shape(p::IntervalAmbiguitySets) = (num_sets(p),) action_shape(::IntervalAmbiguitySets) = (1,) marginals(p::IntervalAmbiguitySets) = (p,) @@ -233,12 +245,27 @@ end num_target(p::IntervalAmbiguitySet) = length(p.lower) +""" + lower(p::IntervalAmbiguitySet) + +Return the lower bound transition probabilities of the ambiguity set to all target states. +""" lower(p::IntervalAmbiguitySet) = p.lower lower(p::IntervalAmbiguitySet, destination) = p.lower[destination] +""" + upper(p::IntervalAmbiguitySet) + +Return the upper bound transition probabilities of the ambiguity set to all target states. +""" upper(p::IntervalAmbiguitySet) = p.lower + p.gap upper(p::IntervalAmbiguitySet, destination) = p.lower[destination] + p.gap[destination] +""" + gap(p::IntervalAmbiguitySet) + +Return the gap between upper and lower bound transition probabilities of the ambiguity set to all target states. +""" gap(p::IntervalAmbiguitySet) = p.gap gap(p::IntervalAmbiguitySet, destination) = p.gap[destination] diff --git a/src/probabilities/Marginal.jl b/src/probabilities/Marginal.jl index d759c641..1d446d15 100644 --- a/src/probabilities/Marginal.jl +++ b/src/probabilities/Marginal.jl @@ -1,3 +1,17 @@ +""" + Marginal{A <: AbstractAmbiguitySets, N, M, I <: LinearIndices} + +A struct to represent the dependency graph of an fRMDP, namely by subselecting (in `getindex`) the (decomposed) +state and action. Furthermore, the struct is responsible for converting the Cartesian index to a linear index for +the underlying ambiguity sets. + +!!! todo + Describe source_dims + +!!! todo + Add example + +""" struct Marginal{A <: AbstractAmbiguitySets, N, M, I <: LinearIndices} ambiguity_sets::A @@ -72,14 +86,58 @@ function checkindices(ambiguity_sets, state_indices, action_indices, source_dims end end +""" + ambiguity_sets(p::Marginal) + +Return the underlying ambiguity sets of the marginal. +""" ambiguity_sets(p::Marginal) = p.ambiguity_sets + +""" + state_variables(p::Marginal) + +Return the state variable indices of the marginal. +""" state_variables(p::Marginal) = p.state_indices + +""" + action_variables(p::Marginal) + +Return the action variable indices of the marginal. +""" action_variables(p::Marginal) = p.action_indices + +""" + source_shape(p::Marginal) + +Return the shape of the source (state) variables of the marginal. The [`FactoredRobustMarkovDecisionProcess`](@ref) +checks if this is less than or equal to the corresponding state variables. +""" source_shape(p::Marginal) = p.source_dims + +""" + action_shape(p::Marginal) + +Return the shape of the action variables of the marginal. The [`FactoredRobustMarkovDecisionProcess`](@ref) +checks if this is equal to the corresponding action variables. +""" action_shape(p::Marginal) = p.action_vars + +""" + num_target(p::Marginal) + +Return the number of target states of the marginal. +""" num_target(p::Marginal) = num_target(ambiguity_sets(p)) -Base.getindex(p::Marginal, source, action) = ambiguity_sets(p)[sub2ind(p, source, action)] +""" + getindex(p::Marginal, action, source) + +Get the ambiguity set corresponding to the given `source` (state) and `action`, where +the relevant indices of `source` and `action` are selected by `p.action_indices` and `p.state_indices` respectively. +The selected index is then converted to a linear index for the underlying ambiguity sets. +""" +Base.getindex(p::Marginal, action, source) = ambiguity_sets(p)[sub2ind(p, action, source)] sub2ind(p::Marginal, action::CartesianIndex, source::CartesianIndex) = sub2ind(p, Tuple(action), Tuple(source)) function sub2ind(p::Marginal, action::NTuple{M, T}, source::NTuple{N, T}) where {N, M, T <: Integer} diff --git a/src/probabilities/probabilities.jl b/src/probabilities/probabilities.jl index 9bf58897..fa2b1cc9 100644 --- a/src/probabilities/probabilities.jl +++ b/src/probabilities/probabilities.jl @@ -42,7 +42,7 @@ isinterval(::IntervalAmbiguitySets) = IsInterval() # Marginals include("Marginal.jl") -export SARectangularMarginal, Marginal, ambiguity_sets, state_variables, action_variables, source_shape, action_shape, num_target +export Marginal, ambiguity_sets, state_variables, action_variables, source_shape, action_shape, num_target ispolytopic(marginal::Marginal) = ispolytopic(ambiguity_sets(marginal)) isinterval(marginal::Marginal) = isinterval(ambiguity_sets(marginal)) From f32d6df95cc767b87e4ce4533642d495faf9ef73 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 18 Sep 2025 22:49:54 +0200 Subject: [PATCH 26/71] Fix mathematical definition of fIMDPs --- docs/src/models.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/models.md b/docs/src/models.md index 47997332..15cf5f3c 100644 --- a/docs/src/models.md +++ b/docs/src/models.md @@ -69,7 +69,7 @@ Formally, an fIMDP ``M`` with ``n`` marginals is a tuple ``M = (S, S_0, A, \math - ``S = S_1 \times \cdots \times S_n`` is a finite set of joint states with ``S_i`` being a finite set of states for the ``i``-th marginal, - ``S_0 \subseteq S`` is a set of initial states, - ``A`` is a finite set of actions, -- ``\mathcal{G} = (\mathcal{V}, \mathcal{E})`` is a directed bipartite graph with nodes ``\mathcal{V} = \{S_1, \ldots, S_n, A_1, \ldots, A_m\} \cup \{S'_1, \ldots, S'_n\}`` representing the state and action variables and their next-state counterparts, and edges ``\mathcal{E} \subseteq \mathcal{V} \times \mathcal{V}`` representing dependencies of ``S'_i`` on ``S_j`` and ``A_k``, +- ``\mathcal{G} = (\mathcal{V}, \mathcal{E})`` is a directed bipartite graph with nodes ``\mathcal{V} = \mathcal{V}_{ind} \cup \mathcal{V}_{cond} = \{S_1, \ldots, S_n, A_1, \ldots, A_m\} \cup \{S'_1, \ldots, S'_n\}`` representing the state and action variables and their next-state counterparts, and edges ``\mathcal{E} \subseteq \mathcal{V}_{ind} \times \mathcal{V}_{cond}`` representing dependencies of ``S'_i`` on ``S_j`` and ``A_k``, - ``\Gamma = \{\Gamma_{s,a}\}_{s\in S,a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\Gamma_{s,a} = \bigotimes_{i=1}^n \Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` with ``\Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` is an interval ambiguity set over the ``i``-th marginal, i.e. over ``S_i``, conditional on the values in ``(s, a)`` of the parent variables ``\text{Pa}_\mathcal{G}(S'_i)`` of ``S'_i`` in ``\mathcal{G}``. ## References From 61a657215b2399a7d835a12de11fc05f35f29f54 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 18 Sep 2025 22:58:43 +0200 Subject: [PATCH 27/71] Add eprint where available --- docs/src/refs.bib | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/src/refs.bib b/docs/src/refs.bib index 86c0b2d7..500a0a38 100644 --- a/docs/src/refs.bib +++ b/docs/src/refs.bib @@ -68,7 +68,8 @@ @incollection{suilen2024robust booktitle={Principles of Verification: Cycling the Probabilistic Landscape: Essays Dedicated to Joost-Pieter Katoen on the Occasion of His 60th Birthday, Part III}, pages={126--154}, year={2024}, - publisher={Springer} + publisher={Springer}, + eprint={2411.11451} } @inproceedings{mathiesen2025scalable, @@ -76,12 +77,14 @@ @inproceedings{mathiesen2025scalable author={Mathiesen, Frederik Baymler and Haesaert, Sofie and Laurenti, Luca}, booktitle={Proceedings of the 28th ACM International Conference on Hybrid Systems: Computation and Control}, pages={1--12}, - year={2025} + year={2025}, + eprint={2411.11803} } @article{schnitzer2025efficient, title={Efficient Solution and Learning of Robust Factored MDPs}, author={Schnitzer, Yannik and Abate, Alessandro and Parker, David}, journal={arXiv preprint arXiv:2508.00707}, - year={2025} + year={2025}, + eprint={2508.00707} } From c2e88cf472701e966e1b06c7b2cc7cbfc0da036a Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 18 Sep 2025 23:00:45 +0200 Subject: [PATCH 28/71] Clarify specification --- docs/src/index.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index d594e668..85dfae41 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -32,8 +32,9 @@ cache-friendly algorithms. See [Specifications](@ref) for more information on th !!! info We use the nomenclature "property" to refer to goal, which defines both how the value function - is initialized and how it is updated after every Bellman iteration, and "specification" refers to whether - to minimize or maximize either the lower bound (pessimistic) or the upper bound (optimistic) of the value function. + is initialized and how it is updated after every Bellman iteration, and "specification" refers to a property + and whether to minimize or maximize either the lower bound (pessimistic) or the upper bound (optimistic) of + the value function. #### Features - Value iteration over IMCs, IMDPs, odIMDPs, and fIMDPs. From 05ef452ddaf82e2fb5a58c445923bb479605247d Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 18 Sep 2025 23:16:54 +0200 Subject: [PATCH 29/71] Add construction examples to models.md --- docs/src/models.md | 128 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/docs/src/models.md b/docs/src/models.md index 15cf5f3c..ed543c47 100644 --- a/docs/src/models.md +++ b/docs/src/models.md @@ -29,6 +29,49 @@ Formally, a fRMDP ``M`` is a tuple ``M = (S, S_0, A, \mathcal{G}, \Gamma)``, whe A path of an fRMDP is a sequence of states and actions ``\omega = (s[0], a[0]), (s[1], a[1]), \dots`` where ``(s[k], a[k]) \in S \times A`` for all ``k \in \mathbb{N}_0``. We denote by ``\omega[k] = s[k]`` the state of the path at time ``k \in \mathbb{N}_0`` and by ``\Omega`` and ``\Omega_{fin}`` the set of all infinite and finite paths, respectively. A _strategy_ or _policy_ for an fRMDP is a function ``\pi : \Omega_{fin} \to A`` that assigns an action, given a (finite) path called the history. _Time-dependent_ Markov strategies are functions from state and time step to an action, i.e. ``\pi : S \times \mathbb{N}_0 \to A``. This can equivalently be described as a sequence of functions indexed by time ``\mathbf{\pi} = (\pi[0], \pi[1], \ldots)``. If ``\pi`` does not depend on time and solely depends on the current state, it is called a _stationary_ strategy. Similar to a strategy, an adversary ``\eta`` is a function that assigns a feasible distribution to a given state. The focus of this package is on dynamic uncertainties where the choice of the adversary is resolved at every time step, called dynamic uncertainty, and where the adversary has access to both the current state and action, called ``(s, a)``-rectangularity. We refer to [suilen2024robust](@cite) for further details on the distinction between static and dynamic uncertainties, types of rectangularity, and their implications. Given a strategy and an adversary, an fRMDP collapses to a finite (factored) Markov chain. +Below is an example of how to construct an fRMDP with 2 state variables (2 and 3 values respectively) and 2 action variables (1 and 2 values respectively), where each marginal ambiguity set is an interval ambiguity set. The first marginal depends on both state variables and the first action variable, while the second marginal only depends on the second state variable and the second action variable. +```julia +using IntervalMDP + +state_vars = (2, 3) +action_vars = (1, 2) + +state_indices = (1, 2) +action_indices = (1,) +state_dims = (2, 3) +action_dims = (1,) +marginal1 = Marginal(IntervalAmbiguitySets(; + lower = [ # 6 ambiguity sets = 2 * 3 source states, 1 action + 1/15 7/30 1/15 13/30 4/15 1/6 + 2/5 7/30 1/30 11/30 2/15 1/10 + ], + upper = [ + 17/30 7/10 2/3 4/5 7/10 2/3 + 9/10 13/15 9/10 5/6 4/5 14/15 + ] +), state_indices, action_indices, state_dims, action_dims) + +state_indices = (2,) +action_indices = (2,) +state_dims = (3,) +action_dims = (2,) +marginal2 = Marginal(IntervalAmbiguitySets(; + lower = [ # 6 ambiguity sets = 3 source states, 2 actions + 1/30 1/3 1/6 1/15 2/5 2/15 + 4/15 1/4 1/6 1/30 2/15 1/30 + 2/15 7/30 1/10 7/30 7/15 1/5 + ], + upper = [ + 2/3 7/15 4/5 11/30 19/30 1/2 + 23/30 4/5 23/30 3/5 7/10 8/15 + 7/15 4/5 23/30 7/10 7/15 23/30 + ] +), state_indices, action_indices, state_dims, action_dims) + +initial_states = [(1, 1)] # Initial states are optional +mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2), initial_states) +``` + ## IMCs Interval Markov Chains (IMCs) [delahaye2011decision](@cite) are a subclass of fRMDPs and a generalization of Markov Chains (MCs), where the transition probabilities are not known exactly, but they are constrained to be in some probability interval. Formally, an IMC ``M`` is a tuple ``M = (S, S_0, \Gamma)``, where @@ -39,6 +82,27 @@ Formally, an IMC ``M`` is a tuple ``M = (S, S_0, \Gamma)``, where An IMC is equivalent to an fRMDP where there is only one state variable, no action variables, and the ambiguity sets are interval ambiguity sets. The dependency graph is just two nodes ``S`` and ``S'`` with a single edge from the former to the latter. Paths and adversaries are defined similarly to fRMDPs. +Example: +```julia +using IntervalMDP + +prob = IntervalAmbiguitySets(; + lower = N[ + 0 1/2 0 + 1/10 3/10 0 + 1/5 1/10 1 + ], + upper = N[ + 1/2 7/10 0 + 3/5 1/2 0 + 7/10 3/10 1 + ], +) + +initial_states = [1] # Initial states are optional +mc = IntervalMarkovChain(prob, initial_states) +``` + ## IMDPs Interval Markov Decision Processes (IMDPs) [givan2000bounded, lahijanian2015formal](@cite), also called bounded-parameter MDPs, are a subclass of fRMDPs and a generalization of MDPs, where the transition probabilities, given source state and action, are not known exactly, but they are constrained to be in some probability interval. IMDPs generalized IMCs by adding actions. Formally, an IMDP ``M`` is a tuple ``M = (S, S_0, A, \Gamma)``, where @@ -50,6 +114,70 @@ Formally, an IMDP ``M`` is a tuple ``M = (S, S_0, A, \Gamma)``, where An IMDP is equivalent to an fRMDP where there is only one state variable, one action variable, and the ambiguity sets are interval ambiguity sets. The dependency graph is three nodes ``S``, ``A``, and ``S'`` with two edges ``S \rightarrow S'`` and ``A \rightarrow S'``. Paths and adversaries are defined similarly to fRMDPs. +Example: +```julia +using IntervalMDP + +prob1 = IntervalAmbiguitySets(; + lower = [ + 0 1/2 + 1/10 3/10 + 1/5 1/10 + ], + upper = [ + 1/2 7/10 + 3/5 1/2 + 7/10 3/10 + ], +) + +prob2 = IntervalAmbiguitySets(; + lower = [ + 1/10 1/5 + 1/5 3/10 + 3/10 2/5 + ], + upper = [ + 3/5 3/5 + 1/2 1/2 + 2/5 2/5 + ], +) + +prob3 = IntervalAmbiguitySets(; + lower = [ + 0 0 + 0 0 + 1 1 + ], + upper = [ + 0 0 + 0 0 + 1 1 + ] +) + +initial_states = [1] +mdp = IntervalMarkovDecisionProcess([prob1, prob2, prob3], initial_states) + +# alternatively +prob = IntervalAmbiguitySets(; + lower = [ + 0 1/2 1/10 1/5 0 0 + 1/10 3/10 1/5 3/10 0 0 + 1/5 1/10 3/10 2/5 1 1 + ], + upper = [ + 1/2 7/10 3/5 2/5 0 0 + 3/5 1/2 1/2 2/5 0 0 + 7/10 3/10 2/5 2/5 1 1 + ], +) + +num_actions = 2 +mdp = IntervalMarkovDecisionProcess(prob, num_actions, initial_states) +``` + ## odIMDPs Orthogonally-decoupled IMDPs (odIMDPs) [mathiesen2025scalable](@cite) are a subclass of fRMDPs designed to be more memory-efficient than IMDPs. The states are structured into an orthogonal, or grid-based, decomposition and the transition probability ambiguity sets, for each source-action pair, as a product of interval ambiguity sets along each marginal. From a7298d9d40466007bbf89607693c6934c9cb3995 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 18 Sep 2025 23:21:44 +0200 Subject: [PATCH 30/71] Add implicit self-loop example --- docs/src/models.md | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/docs/src/models.md b/docs/src/models.md index ed543c47..4d185a55 100644 --- a/docs/src/models.md +++ b/docs/src/models.md @@ -163,7 +163,7 @@ mdp = IntervalMarkovDecisionProcess([prob1, prob2, prob3], initial_states) # alternatively prob = IntervalAmbiguitySets(; lower = [ - 0 1/2 1/10 1/5 0 0 + 0 1/2 1/10 1/5 0 0 1/10 3/10 1/5 3/10 0 0 1/5 1/10 3/10 2/5 1 1 ], @@ -178,6 +178,29 @@ num_actions = 2 mdp = IntervalMarkovDecisionProcess(prob, num_actions, initial_states) ``` +It is possible to skip defining actions when the transition is a guaranteed self-loop and is the last states in the ambiguity set. +This is useful for defining target states in reachability problems. The example below has 3 states (as shown by the 3 rows) and 2 actions +(explictly defined by `num_actions = 2`). The last state is a target state with a guaranteed self-loop, i.e., the transition probabilities are ``P(3|3,a) = 1`` for both actions ``a \in \{1, 2\}``. +```julia +using IntervalMDP + +prob = IntervalAmbiguitySets(; + lower = [ + 0 1/2 1/10 1/5 + 1/10 3/10 1/5 3/10 + 1/5 1/10 3/10 2/5 + ], + upper = [ + 1/2 7/10 3/5 2/5 + 3/5 1/2 1/2 2/5 + 7/10 3/10 2/5 2/5 + ], +) + +num_actions = 2 +mdp = IntervalMarkovDecisionProcess(prob, num_actions, initial_states) +``` + ## odIMDPs Orthogonally-decoupled IMDPs (odIMDPs) [mathiesen2025scalable](@cite) are a subclass of fRMDPs designed to be more memory-efficient than IMDPs. The states are structured into an orthogonal, or grid-based, decomposition and the transition probability ambiguity sets, for each source-action pair, as a product of interval ambiguity sets along each marginal. From 4dcc23b67c2ffef900cf4285c665a11cf02a73cd Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Fri, 19 Sep 2025 15:49:46 +0200 Subject: [PATCH 31/71] Print outputs in docs --- docs/src/models.md | 28 +++++++++++---------- docs/src/reference/systems.md | 2 +- src/models/IntervalMarkovDecisionProcess.jl | 4 +-- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/docs/src/models.md b/docs/src/models.md index 4d185a55..abbf3dfe 100644 --- a/docs/src/models.md +++ b/docs/src/models.md @@ -30,8 +30,8 @@ A path of an fRMDP is a sequence of states and actions ``\omega = (s[0], a[0]), A _strategy_ or _policy_ for an fRMDP is a function ``\pi : \Omega_{fin} \to A`` that assigns an action, given a (finite) path called the history. _Time-dependent_ Markov strategies are functions from state and time step to an action, i.e. ``\pi : S \times \mathbb{N}_0 \to A``. This can equivalently be described as a sequence of functions indexed by time ``\mathbf{\pi} = (\pi[0], \pi[1], \ldots)``. If ``\pi`` does not depend on time and solely depends on the current state, it is called a _stationary_ strategy. Similar to a strategy, an adversary ``\eta`` is a function that assigns a feasible distribution to a given state. The focus of this package is on dynamic uncertainties where the choice of the adversary is resolved at every time step, called dynamic uncertainty, and where the adversary has access to both the current state and action, called ``(s, a)``-rectangularity. We refer to [suilen2024robust](@cite) for further details on the distinction between static and dynamic uncertainties, types of rectangularity, and their implications. Given a strategy and an adversary, an fRMDP collapses to a finite (factored) Markov chain. Below is an example of how to construct an fRMDP with 2 state variables (2 and 3 values respectively) and 2 action variables (1 and 2 values respectively), where each marginal ambiguity set is an interval ambiguity set. The first marginal depends on both state variables and the first action variable, while the second marginal only depends on the second state variable and the second action variable. -```julia -using IntervalMDP +```@example +using IntervalMDP # hide state_vars = (2, 3) action_vars = (1, 2) @@ -83,16 +83,16 @@ Formally, an IMC ``M`` is a tuple ``M = (S, S_0, \Gamma)``, where An IMC is equivalent to an fRMDP where there is only one state variable, no action variables, and the ambiguity sets are interval ambiguity sets. The dependency graph is just two nodes ``S`` and ``S'`` with a single edge from the former to the latter. Paths and adversaries are defined similarly to fRMDPs. Example: -```julia -using IntervalMDP +```@example +using IntervalMDP # hide prob = IntervalAmbiguitySets(; - lower = N[ + lower = [ 0 1/2 0 1/10 3/10 0 1/5 1/10 1 ], - upper = N[ + upper = [ 1/2 7/10 0 3/5 1/2 0 7/10 3/10 1 @@ -115,8 +115,8 @@ Formally, an IMDP ``M`` is a tuple ``M = (S, S_0, A, \Gamma)``, where An IMDP is equivalent to an fRMDP where there is only one state variable, one action variable, and the ambiguity sets are interval ambiguity sets. The dependency graph is three nodes ``S``, ``A``, and ``S'`` with two edges ``S \rightarrow S'`` and ``A \rightarrow S'``. Paths and adversaries are defined similarly to fRMDPs. Example: -```julia -using IntervalMDP +```@example +using IntervalMDP # hide prob1 = IntervalAmbiguitySets(; lower = [ @@ -145,12 +145,12 @@ prob2 = IntervalAmbiguitySets(; ) prob3 = IntervalAmbiguitySets(; - lower = [ + lower = Float64[ 0 0 0 0 1 1 ], - upper = [ + upper = Float64[ 0 0 0 0 1 1 @@ -181,8 +181,8 @@ mdp = IntervalMarkovDecisionProcess(prob, num_actions, initial_states) It is possible to skip defining actions when the transition is a guaranteed self-loop and is the last states in the ambiguity set. This is useful for defining target states in reachability problems. The example below has 3 states (as shown by the 3 rows) and 2 actions (explictly defined by `num_actions = 2`). The last state is a target state with a guaranteed self-loop, i.e., the transition probabilities are ``P(3|3,a) = 1`` for both actions ``a \in \{1, 2\}``. -```julia -using IntervalMDP +```@example +using IntervalMDP # hide prob = IntervalAmbiguitySets(; lower = [ @@ -198,7 +198,7 @@ prob = IntervalAmbiguitySets(; ) num_actions = 2 -mdp = IntervalMarkovDecisionProcess(prob, num_actions, initial_states) +mdp = IntervalMarkovDecisionProcess(prob, num_actions) ``` ## odIMDPs @@ -223,6 +223,8 @@ Formally, an fIMDP ``M`` with ``n`` marginals is a tuple ``M = (S, S_0, A, \math - ``\mathcal{G} = (\mathcal{V}, \mathcal{E})`` is a directed bipartite graph with nodes ``\mathcal{V} = \mathcal{V}_{ind} \cup \mathcal{V}_{cond} = \{S_1, \ldots, S_n, A_1, \ldots, A_m\} \cup \{S'_1, \ldots, S'_n\}`` representing the state and action variables and their next-state counterparts, and edges ``\mathcal{E} \subseteq \mathcal{V}_{ind} \times \mathcal{V}_{cond}`` representing dependencies of ``S'_i`` on ``S_j`` and ``A_k``, - ``\Gamma = \{\Gamma_{s,a}\}_{s\in S,a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\Gamma_{s,a} = \bigotimes_{i=1}^n \Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` with ``\Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` is an interval ambiguity set over the ``i``-th marginal, i.e. over ``S_i``, conditional on the values in ``(s, a)`` of the parent variables ``\text{Pa}_\mathcal{G}(S'_i)`` of ``S'_i`` in ``\mathcal{G}``. +The example in [Factored RMDPs](@ref) is also an example of an fIMDP. + ## References ```@bibliography Pages = ["models.md"] diff --git a/docs/src/reference/systems.md b/docs/src/reference/systems.md index 58ec2fb9..0876b13b 100644 --- a/docs/src/reference/systems.md +++ b/docs/src/reference/systems.md @@ -8,7 +8,7 @@ initial_states(mp::IntervalMarkovProcess) AllStates ``` -## Factored RMDPs +## [Factored RMDPs](@id api-frmdp) ```@docs FactoredRobustMarkovDecisionProcess state_variables(s::FactoredRobustMarkovDecisionProcess) diff --git a/src/models/IntervalMarkovDecisionProcess.jl b/src/models/IntervalMarkovDecisionProcess.jl index 15a54d03..4908e82f 100644 --- a/src/models/IntervalMarkovDecisionProcess.jl +++ b/src/models/IntervalMarkovDecisionProcess.jl @@ -47,9 +47,9 @@ See [IMDPs](@ref) for the formal definition. """ function IntervalMarkovDecisionProcess( - ps::Vector{<:IntervalAmbiguitySets}, + ps::Vector{<:IntervalAmbiguitySets{R, MR}}, initial_states::InitialStates = AllStates(), -) +) where {R, MR <: AbstractMatrix{R}} marginal = interval_prob_hcat(ps) return IntervalMarkovDecisionProcess(marginal, initial_states) end From c97abe3200c3b6d87a2d5909cf7593a51b5b1932 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Sat, 20 Sep 2025 21:30:22 +0200 Subject: [PATCH 32/71] Begin documenting specifications --- docs/src/index.md | 6 -- docs/src/models.md | 5 +- docs/src/specifications.md | 205 ++++++++++++++++++++++++++++++++++--- 3 files changed, 196 insertions(+), 20 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 85dfae41..4e87426e 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -30,12 +30,6 @@ reward, and expected hitting times, and over finite and infinite horizons. For m the package supports Deterministic Finite Automata (DFA), with lazy product construction and efficient, cache-friendly algorithms. See [Specifications](@ref) for more information on the supported specifications. -!!! info - We use the nomenclature "property" to refer to goal, which defines both how the value function - is initialized and how it is updated after every Bellman iteration, and "specification" refers to a property - and whether to minimize or maximize either the lower bound (pessimistic) or the upper bound (optimistic) of - the value function. - #### Features - Value iteration over IMCs, IMDPs, odIMDPs, and fIMDPs. - Plenty of built-in specifications including reachability, safety, reach-avoid, discounted reward, and expected hitting times. diff --git a/docs/src/models.md b/docs/src/models.md index 4d185a55..25bb2ceb 100644 --- a/docs/src/models.md +++ b/docs/src/models.md @@ -1,6 +1,6 @@ # Models #### Mathematical Notation -We denote the natural numbers by ``\mathbb{N}`` and ``\mathbb{N}_0 = \mathbb{N} \cup \{0\}``. A probability distribution ``\gamma`` over a finite set ``S`` is a function ``\gamma : S \to [0, 1]`` satisfying ``\sum_{s \in S} \gamma(s) = 1``. We denote by ``\mathcal{D}(S)`` the set of all probability distributions over ``S``. +We denote the natural numbers by ``\mathbb{N}`` and ``\mathbb{N}_0 = \mathbb{N} \cup \{0\}``. A probability distribution ``\gamma`` over a finite set ``S`` is a function ``\gamma : S \to [0, 1]`` satisfying ``\sum_{s \in S} \gamma(s) = 1``. The support of the distribution ``\mathop{supp}(\gamma)`` is defined as ``\mathop{supp}(\gamma) = \{ s \in S : \gamma(s) > 0\}``. We denote by ``\mathcal{D}(S)`` the set of all probability distributions over ``S``. For ``\underline{\gamma}, \overline{\gamma} : S \to [0, 1]`` such that ``\underline{\gamma}(s) \leq \overline{\gamma}(s)`` for each ``s \in S`` and ``\sum_{s \in S} \underline{\gamma}(s) \leq 1 \leq \sum_{s \in S} \overline{\gamma}(s)``, an interval ambiguity set ``\Gamma \subset \mathcal{D}(S)`` is the set of distributions such that ```math \Gamma = \{ \gamma \in \mathcal{D}(S) \,:\, \underline{\gamma}(s) \leq \gamma(s) \leq \overline{\gamma}(s) \text{ for each } s\in S \}. @@ -26,7 +26,8 @@ Formally, a fRMDP ``M`` is a tuple ``M = (S, S_0, A, \mathcal{G}, \Gamma)``, whe \Gamma_{s,a} = \left\{ \gamma \in \mathcal{D}(S) \,:\, \gamma(s') = \prod_{i=1}^n \gamma^i(s'_i | s_{\text{Pa}_{\mathcal{G}_S}(S'_i)}, a_{\text{Pa}_{\mathcal{G}_A}(S'_i)}), \, \gamma^i(\cdot | s_{\text{Pa}_{\mathcal{G}_S}(S'_i)}, a_{\text{Pa}_{\mathcal{G}_A}(S'_i)}) \in \Gamma^i_{\text{Pa}_\mathcal{G}(S'_i)} \right\}. ``` -A path of an fRMDP is a sequence of states and actions ``\omega = (s[0], a[0]), (s[1], a[1]), \dots`` where ``(s[k], a[k]) \in S \times A`` for all ``k \in \mathbb{N}_0``. We denote by ``\omega[k] = s[k]`` the state of the path at time ``k \in \mathbb{N}_0`` and by ``\Omega`` and ``\Omega_{fin}`` the set of all infinite and finite paths, respectively. +For a given source-action pair ``(s, a) \in S \times A``, any distribution ``\gamma_{s, a} \in \Gamma_{s,a}`` is called a feasible distribution, and feasible transitions are triplets ``(s, a, s') \in S \times A \time S`` where ``s' \in \mathop{supp}(\gamma_{s, a})`` for any feasible distribution ``\gamma_{s, a} \in \Gamma_{s, a}``. A path of an fRMDP is a sequence of states and actions ``\omega = s[0], a[0], s[1], a[1], \dots`` where ``s[k] \in S`` and ``a[k] \in A`` for all ``k \in \mathbb{N}_0``, and ``(s[k], a[k], s[k + 1])`` is a feasible transition for all ``k \in \mathbb{N}_0``. We denote by ``\omega[k] = s[k]`` the state of the path at time ``k \in \mathbb{N}_0`` and by ``\Omega`` and ``\Omega_{fin}`` the set of all infinite and finite paths, respectively. + A _strategy_ or _policy_ for an fRMDP is a function ``\pi : \Omega_{fin} \to A`` that assigns an action, given a (finite) path called the history. _Time-dependent_ Markov strategies are functions from state and time step to an action, i.e. ``\pi : S \times \mathbb{N}_0 \to A``. This can equivalently be described as a sequence of functions indexed by time ``\mathbf{\pi} = (\pi[0], \pi[1], \ldots)``. If ``\pi`` does not depend on time and solely depends on the current state, it is called a _stationary_ strategy. Similar to a strategy, an adversary ``\eta`` is a function that assigns a feasible distribution to a given state. The focus of this package is on dynamic uncertainties where the choice of the adversary is resolved at every time step, called dynamic uncertainty, and where the adversary has access to both the current state and action, called ``(s, a)``-rectangularity. We refer to [suilen2024robust](@cite) for further details on the distinction between static and dynamic uncertainties, types of rectangularity, and their implications. Given a strategy and an adversary, an fRMDP collapses to a finite (factored) Markov chain. Below is an example of how to construct an fRMDP with 2 state variables (2 and 3 values respectively) and 2 action variables (1 and 2 values respectively), where each marginal ambiguity set is an interval ambiguity set. The first marginal depends on both state variables and the first action variable, while the second marginal only depends on the second state variable and the second action variable. diff --git a/docs/src/specifications.md b/docs/src/specifications.md index 221715a9..7a0a0be1 100644 --- a/docs/src/specifications.md +++ b/docs/src/specifications.md @@ -1,25 +1,206 @@ # Specifications +Specifications are compromised of a property and whether to minimize or maximize either the lower bound (pessimistic) or the upper bound (optimistic) ofthe value function. The property, or goal, e.g. reachability and reach-avoid, defines both how the value function is initialized and how it is updated after every Bellman iteration. The property also defines whether the horizon is finite or infinite, which impacts the stopping criteria and the resulting strategy type. In particular, for the infinite horizon, model checking algorithm continues until a convergence threshold is met and the strategy, if performing control synthesis, is stationary, while for a finite horizon, the strategy is time varying. -## Reachability -In this formal framework, we can describe computing reachability given a target set ``G`` and a horizon ``K \in \mathbb{N} \cup \{\infty\}`` as the following objective +!!! note + The adversary is never synthesized directly and is always considered time-varying and dynamic. Over the infinite horizon, similar to the strategy, a time-varying adversary at convergence coincides with a stationary and static adversary (CITE). Without loss of generality below, we assume that the adversary ``\eta`` and strategy ``\pi`` are given. +!!! todo + Add the proper citation everywhere + +As an example of constructing the specification, we consider here a reachability specification for an IMDP. +```@example +using IntervalMDP # hide + +time_horizon = 10 +prop = FiniteTimeReachability([3, 9, 10], time_horizon) + +spec = Specification(prop) # Default: Pessimistic, Maximize + +# Explicit satisfaction mode (pessimistic/optimistic) +spec = Specification(prop, Pessimistic) # Default: Maximize, useful for Markov chains +spec = Specification(prop, Optimistic) + +# Explicit strategy mode (minimize/maxize) +spec = Specification(prop, Pessimistic, Maximize) +spec = Specification(prop, Pessimistic, Minimize) # Unusual, but available +spec = Specification(prop, Optimistic, Maximize) # Unusual, but avialable +spec = Specification(prop, Optimistic, Minimize) +``` + + +## Simple properties +In the sections below, we will enumerate the possible simple properties (meaning no task automaton required), their equivalence to some value function, and how to construct them. For complex properties and how to construct task automata see [Complex properties](@ref), + +### Reachability +Given a target set ``G`` and a horizon ``K \in \mathbb{N} \cup \{\infty\}`` reachability is the following objective ```math -{\mathop{opt}\limits_{\pi}}^{\pi} \; {\mathop{opt}\limits_{\eta}}^{\eta} \; \mathbb{P}_{\pi,\eta }\left[\omega \in \Omega : \exists k \in [0,K], \, \omega(k)\in G \right], +\mathbb{P}^{\pi, \eta}_{\mathrm{reach}}(G, K) = \mathbb{P}^{\pi, \eta} \left[\omega \in \Omega : \exists k \in \{0, \ldots, K\}, \, \omega[k] \in G \right]. ``` -where ``\mathop{opt}^{\pi},\mathop{opt}^{\eta} \in \{\min, \max\}`` and ``\mathbb{P}_{\pi,\eta }`` is the probability of the Markov chain induced by strategy ``\pi`` and adversary ``\eta``. -When ``\mathop{opt}^{\eta} = \min``, the solution is called optimal _pessimistic_ probability (or reward), and conversely is called optimal _optimistic_ probability (or reward) when ``\mathop{opt}^{\eta} = \max``. -The choice of the min/max for the action and pessimistic/optimistic probability depends on the application. +The property is equivalent to the following value function +```math + \begin{aligned} + V^{\pi, \eta}_0(s) &= \mathbf{1}_{G}(s)\\ + V^{\pi, \eta}_k(s) &= \mathbf{1}_{G}(s) + \mathbf{1}_{S \setminus G}(s) \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V_{k + 1}(s')] + \end{aligned} +``` +such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{reach}}(G, K) = V_K(s)``, where for ``K = \infty`` the adversary does not depend on time. -## Discounted reward -Discounted reward is similar to reachability but instead of a target set, we have a reward function ``r: S \to \mathbb{R}`` and a discount factor ``\gamma \in (0, 1)``. The objective is then +Example: +```@example +using IntervalMDP # hide +# Finite horizon +time_horizon = 10 + +# Example with a single state variable +prop = FiniteTimeReachability([3, 9, 10], time_horizon) # Single state variable only +prop = FiniteTimeReachability([(3,), (9,), (10,)], time_horizon) # Format available for multiple state variables + +# Example with 3 state variables +prop = FiniteTimeReachability([(4, 3, 9)], time_horizon) + +# Infinite horizon +convergence_threshold = 1e-8 +prop = InfiniteTimeReachability([3, 9, 10], convergence_threshold) +``` +In addition to finite and infinite horizon reachability, we also define _exact_ time reachability, which is the following property ```math -{\mathop{opt}\limits_{\pi}}^{\pi} \; {\mathop{opt}\limits_{\eta}}^{\eta} \; \mathbb{E}_{\pi,\eta }\left[\sum_{k=0}^{K} \gamma^k r(\omega(k)) \right]. +\mathbb{P}^{\pi, \eta}_{\mathrm{exact-reach}}(G, K) = \mathbb{P}^{\pi, \eta} \left[\omega \in \Omega : \omega[K] \in G \right], +``` +which is equivalent with the following value function +```math + \begin{aligned} + V^{\pi, \eta}_0(s) &= \mathbf{1}_{G}(s)\\ + V^{\pi, \eta}_k(s) &= \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V_{k + 1}(s')] + \end{aligned} +``` +such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{exact-reach}}(G, K) = V_K(s)`` for a horizon ``K \in \mathbb{N}``. + +This can be constructed similarly +```@example +using IntervalMDP # hide +time_horizon = 10 + +# Example with a single state variable +prop = ExactTimeReachability([3, 9, 10], time_horizon) # Single state variable only +prop = ExactTimeReachability([(3,), (9,), (10,)], time_horizon) # Format available for multiple state variables + +# Example with 3 state variables +prop = ExactTimeReachability([(4, 3, 9)], time_horizon) ``` -[1] Givan, Robert, Sonia Leach, and Thomas Dean. "Bounded-parameter Markov decision processes." Artificial Intelligence 122.1-2 (2000): 71-109. +### Reach-avoid +Given a target set ``G``, an avoid set ``O`` (with ``G \cap O = \emptyset``), and a horizon ``K \in \mathbb{N} \cup \{\infty\}`` reach-avoid is the following objective +```math +\mathbb{P}^{\pi, \eta}_{\mathrm{reach-avoid}}(G, O, K) = \mathbb{P}^{\pi, \eta} \left[\omega \in \Omega : \exists k \in \{0, \ldots, K\}, \, \omega[k] \in G, \; \forall k' \in \{0, \ldots, k' \}, \, \omega[k] \notin O \right]. +``` + +The property is equivalent to the following value function +```math + \begin{aligned} + V^{\pi, \eta}_0(s) &= \mathbf{1}_{G}(s)\\ + V^{\pi, \eta}_k(s) &= \mathbf{1}_{G}(s) + \mathbf{1}_{S \setminus (G \cup O)}(s) \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V_{k + 1}(s')] + \end{aligned} +``` +such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{reach-avoid}}(G, O, K) = V_K(s)``, where for ``K = \infty`` the adversary does not depend on time. + +Example: +```@example +using IntervalMDP # hide +# Finite horizon +time_horizon = 10 + +# Example with a single state variable +reach = [3, 9] +avoid = [10] +prop = FiniteTimeReachAvoid(reach, avoid, time_horizon) # Single state variable only -[2] Suilen, M., Badings, T., Bovy, E. M., Parker, D., & Jansen, N. (2024). Robust Markov Decision Processes: A Place Where AI and Formal Methods Meet. In Principles of Verification: Cycling the Probabilistic Landscape: Essays Dedicated to Joost-Pieter Katoen on the Occasion of His 60th Birthday, Part III (pp. 126-154). Cham: Springer Nature Switzerland. +reach = [(3,), (9,)] +avoid = [(10,)] +prop = FiniteTimeReachAvoid(reach, avoid, time_horizon) # Format available for multiple state variables + +# Example with 3 state variables +reach = [(4, 3, 9)] +avoid = [(1, 1, 9)] +prop = FiniteTimeReachAvoid(reach, avoid, time_horizon) + +# Infinite horizon +convergence_threshold = 1e-8 +prop = InfiniteTimeReachAvoid(reach, avoid, convergence_threshold) +``` + +We also define _exact_ time reach-avoid, which is the following property +```math +\mathbb{P}^{\pi, \eta}_{\mathrm{exact-reach-avoid}}(G, O, K) = \mathbb{P}^{\pi, \eta} \left[\omega \in \Omega : \omega[K] \in G, \; \forall k \in \{0, \ldots, K\}, \, \omega[k] \notin O \right], +``` +which is equivalent with the following value function +```math + \begin{aligned} + V^{\pi, \eta}_0(s) &= \mathbf{1}_{G}(s)\\ + V^{\pi, \eta}_k(s) &= \mathbf{1}_{S \setminus O}(s)\mathbb{E}_{s' \sim \eta(s, a, K - k)}[V_{k + 1}(s')] + \end{aligned} +``` +such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{exact-reach}}(G, K) = V_K(s)`` for a horizon ``K \in \mathbb{N}``. + +This can be constructed similarly +```@example +using IntervalMDP # hide +time_horizon = 10 + +# Example with a single state variable +reach = [3, 9] +avoid = [10] +prop = ExactTimeReachAvoid(reach, avoid, time_horizon) # Single state variable only + +reach = [(3,), (9,)] +avoid = [(10,)] +prop = ExactTimeReachAvoid(reach, avoid, time_horizon) # Format available for multiple state variables + +# Example with 3 state variables +reach = [(4, 3, 9)] +avoid = [(1, 1, 9)] +prop = ExactTimeReachAvoid(reach, avoid, time_horizon) +``` + +### Safety +Given an avoid set ``O`` and a horizon ``K \in \mathbb{N} \cup \{\infty\}`` safety is the following objective +```math +\mathbb{P}^{\pi, \eta}_{\mathrm{safe}}(O, K) = \mathbb{P}^{\pi, \eta} \left[\omega \in \Omega : \forall k \in \{0, \ldots, K\}, \, \omega[k] \notin O \right]. +``` +This property can by duality with reachability equivalently be states as ``\mathbb{P}^{\pi, \eta}_{\mathrm{safe}}(O, K) = 1 - \mathbb{P}^{\pi, \eta}_{\mathrm{reach}}(G, K)``. Note that if the strategy and adversary are not given, their optimization direction must be flipped in the dual objective. Alternatively, the property can be stated via the following value function +```math + \begin{aligned} + V^{\pi, \eta}_0(s) &= -\mathbf{1}_{O}(s)\\ + V^{\pi, \eta}_k(s) &= -\mathbf{1}_{O}(s) + \mathbf{1}_{S \setminus O}(s) \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V_{k + 1}(s')] + \end{aligned} +``` +such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{safe}}(G, K) = 1 + V_K(s)``, where for ``K = \infty`` the adversary does not depend on time. +The benefit of this formulation is that the optimization directions need not be flipped. + +Example: +```@example +using IntervalMDP # hide +# Finite horizon +time_horizon = 10 + +# Example with a single state variable +prop = FiniteTimeSafety([10], time_horizon) # Single state variable only +prop = FiniteTimeSafety([(10,)], time_horizon) # Format available for multiple state variables + +# Example with 3 state variables +prop = FiniteTimeSafety([(4, 3, 9)], time_horizon) + +# Infinite horizon +convergence_threshold = 1e-8 +prop = InfiniteTimeSafety([3, 9, 10], convergence_threshold) +``` + +### Discounted reward +Discounted reward is similar to reachability but instead of a target set, we have a reward function ``r: S \to \mathbb{R}`` and a discount factor ``\gamma \in (0, 1)``. The objective is then + +```math +{\mathop{opt}\limits_{\pi}}^{\pi} \; {\mathop{opt}\limits_{\eta}}^{\eta} \; \mathbb{E}_{\pi,\eta }\left[\sum_{k=0}^{K} \gamma^k r(\omega(k)) \right]. +``` -[3] Mathiesen, F. B., Haesaert, S., & Laurenti, L. (2024). Scalable control synthesis for stochastic systems via structural IMDP abstractions. arXiv preprint arXiv:2411.11803. \ No newline at end of file +## Complex properties From 3c15a1704c34345ebfdc48c83dfa1b0e1f26a462 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Sat, 20 Sep 2025 21:31:14 +0200 Subject: [PATCH 33/71] Fix error in describing safety via duality --- docs/src/specifications.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/specifications.md b/docs/src/specifications.md index 7a0a0be1..05c2ca26 100644 --- a/docs/src/specifications.md +++ b/docs/src/specifications.md @@ -168,7 +168,7 @@ Given an avoid set ``O`` and a horizon ``K \in \mathbb{N} \cup \{\infty\}`` safe ```math \mathbb{P}^{\pi, \eta}_{\mathrm{safe}}(O, K) = \mathbb{P}^{\pi, \eta} \left[\omega \in \Omega : \forall k \in \{0, \ldots, K\}, \, \omega[k] \notin O \right]. ``` -This property can by duality with reachability equivalently be states as ``\mathbb{P}^{\pi, \eta}_{\mathrm{safe}}(O, K) = 1 - \mathbb{P}^{\pi, \eta}_{\mathrm{reach}}(G, K)``. Note that if the strategy and adversary are not given, their optimization direction must be flipped in the dual objective. Alternatively, the property can be stated via the following value function +This property can by duality with reachability equivalently be states as ``\mathbb{P}^{\pi, \eta}_{\mathrm{safe}}(O, K) = 1 - \mathbb{P}^{\pi, \eta}_{\mathrm{reach}}(O, K)``. Note that if the strategy and adversary are not given, their optimization direction must be flipped in the dual objective. Alternatively, the property can be stated via the following value function ```math \begin{aligned} V^{\pi, \eta}_0(s) &= -\mathbf{1}_{O}(s)\\ From 2caa749d5cd499a0e7b7225b3c28e1311e57b291 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 22 Sep 2025 22:24:51 +0200 Subject: [PATCH 34/71] Document complex properties --- docs/src/reference/systems.md | 1 - docs/src/references.md | 5 +- docs/src/refs.bib | 16 ++ docs/src/specifications.md | 216 ++++++++++++++++++++++-- src/models/DFA.jl | 37 ++-- src/models/ProductProcess.jl | 12 +- src/probabilities/Labelling.jl | 5 +- src/probabilities/TransitionFunction.jl | 2 +- src/specification.jl | 2 + 9 files changed, 256 insertions(+), 40 deletions(-) diff --git a/docs/src/reference/systems.md b/docs/src/reference/systems.md index 0876b13b..248dde6c 100644 --- a/docs/src/reference/systems.md +++ b/docs/src/reference/systems.md @@ -30,7 +30,6 @@ num_labels(dfa::DFA) transition(dfa::DFA) labelmap(dfa::DFA) initial_state(dfa::DFA) -accepting_states(dfa::DFA) ProductProcess markov_process(proc::ProductProcess) automaton(proc::ProductProcess) diff --git a/docs/src/references.md b/docs/src/references.md index df106f26..6e968ea4 100644 --- a/docs/src/references.md +++ b/docs/src/references.md @@ -1,3 +1,6 @@ # Bibliography ```@bibliography -``` \ No newline at end of file +``` + +!!! todo + Fix citation newline and indentation issues. \ No newline at end of file diff --git a/docs/src/refs.bib b/docs/src/refs.bib index 500a0a38..a2d64866 100644 --- a/docs/src/refs.bib +++ b/docs/src/refs.bib @@ -20,6 +20,13 @@ @article{nilim2005robust publisher={INFORMS} } +@book{baier2008principles, + title={Principles of model checking}, + author={Baier, Christel and Katoen, Joost-Pieter}, + year={2008}, + publisher={MIT press} +} + @inproceedings{delahaye2011decision, title={Decision problems for interval Markov chains}, author={Delahaye, Beno{\^\i}t and Larsen, Kim G and Legay, Axel and Pedersen, Mikkel L and W{\k{a}}sowski, Andrzej}, @@ -40,6 +47,15 @@ @article{delgado2011efficient publisher={Elsevier} } +@inproceedings{de2013linear, + title={Linear Temporal Logic and Linear Dynamic Logic on Finite Traces.}, + author={De Giacomo, Giuseppe and Vardi, Moshe Y and others}, + booktitle={Ijcai}, + volume={13}, + pages={854--860}, + year={2013} +} + @article{wiesemann2013robust, title={Robust Markov decision processes}, author={Wiesemann, Wolfram and Kuhn, Daniel and Rustem, Ber{\c{c}}}, diff --git a/docs/src/specifications.md b/docs/src/specifications.md index 05c2ca26..2f210dad 100644 --- a/docs/src/specifications.md +++ b/docs/src/specifications.md @@ -2,10 +2,7 @@ Specifications are compromised of a property and whether to minimize or maximize either the lower bound (pessimistic) or the upper bound (optimistic) ofthe value function. The property, or goal, e.g. reachability and reach-avoid, defines both how the value function is initialized and how it is updated after every Bellman iteration. The property also defines whether the horizon is finite or infinite, which impacts the stopping criteria and the resulting strategy type. In particular, for the infinite horizon, model checking algorithm continues until a convergence threshold is met and the strategy, if performing control synthesis, is stationary, while for a finite horizon, the strategy is time varying. !!! note - The adversary is never synthesized directly and is always considered time-varying and dynamic. Over the infinite horizon, similar to the strategy, a time-varying adversary at convergence coincides with a stationary and static adversary (CITE). Without loss of generality below, we assume that the adversary ``\eta`` and strategy ``\pi`` are given. - -!!! todo - Add the proper citation everywhere + The adversary is never synthesized directly and is always considered time-varying and dynamic. Over the infinite horizon, similar to the strategy, a time-varying adversary at convergence coincides with a stationary and static adversary [suilen2024robust](@cite). Without loss of generality below, we assume that the adversary ``\eta`` and strategy ``\pi`` are given. As an example of constructing the specification, we consider here a reachability specification for an IMDP. ```@example @@ -32,7 +29,7 @@ spec = Specification(prop, Optimistic, Minimize) In the sections below, we will enumerate the possible simple properties (meaning no task automaton required), their equivalence to some value function, and how to construct them. For complex properties and how to construct task automata see [Complex properties](@ref), ### Reachability -Given a target set ``G`` and a horizon ``K \in \mathbb{N} \cup \{\infty\}`` reachability is the following objective +Given a target set ``G \subset S`` and a horizon ``K \in \mathbb{N} \cup \{\infty\}``, reachability is the following objective ```math \mathbb{P}^{\pi, \eta}_{\mathrm{reach}}(G, K) = \mathbb{P}^{\pi, \eta} \left[\omega \in \Omega : \exists k \in \{0, \ldots, K\}, \, \omega[k] \in G \right]. ``` @@ -41,7 +38,7 @@ The property is equivalent to the following value function ```math \begin{aligned} V^{\pi, \eta}_0(s) &= \mathbf{1}_{G}(s)\\ - V^{\pi, \eta}_k(s) &= \mathbf{1}_{G}(s) + \mathbf{1}_{S \setminus G}(s) \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V_{k + 1}(s')] + V^{\pi, \eta}_k(s) &= \mathbf{1}_{G}(s) + \mathbf{1}_{S \setminus G}(s) \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(s')] \end{aligned} ``` such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{reach}}(G, K) = V_K(s)``, where for ``K = \infty`` the adversary does not depend on time. @@ -72,7 +69,7 @@ which is equivalent with the following value function ```math \begin{aligned} V^{\pi, \eta}_0(s) &= \mathbf{1}_{G}(s)\\ - V^{\pi, \eta}_k(s) &= \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V_{k + 1}(s')] + V^{\pi, \eta}_k(s) &= \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(s')] \end{aligned} ``` such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{exact-reach}}(G, K) = V_K(s)`` for a horizon ``K \in \mathbb{N}``. @@ -91,7 +88,7 @@ prop = ExactTimeReachability([(4, 3, 9)], time_horizon) ``` ### Reach-avoid -Given a target set ``G``, an avoid set ``O`` (with ``G \cap O = \emptyset``), and a horizon ``K \in \mathbb{N} \cup \{\infty\}`` reach-avoid is the following objective +Given a target set ``G \subset S``, an avoid set ``O \subset S`` (with ``G \cap O = \emptyset``), and a horizon ``K \in \mathbb{N} \cup \{\infty\}``, reach-avoid is the following objective ```math \mathbb{P}^{\pi, \eta}_{\mathrm{reach-avoid}}(G, O, K) = \mathbb{P}^{\pi, \eta} \left[\omega \in \Omega : \exists k \in \{0, \ldots, K\}, \, \omega[k] \in G, \; \forall k' \in \{0, \ldots, k' \}, \, \omega[k] \notin O \right]. ``` @@ -100,7 +97,7 @@ The property is equivalent to the following value function ```math \begin{aligned} V^{\pi, \eta}_0(s) &= \mathbf{1}_{G}(s)\\ - V^{\pi, \eta}_k(s) &= \mathbf{1}_{G}(s) + \mathbf{1}_{S \setminus (G \cup O)}(s) \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V_{k + 1}(s')] + V^{\pi, \eta}_k(s) &= \mathbf{1}_{G}(s) + \mathbf{1}_{S \setminus (G \cup O)}(s) \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(s')] \end{aligned} ``` such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{reach-avoid}}(G, O, K) = V_K(s)``, where for ``K = \infty`` the adversary does not depend on time. @@ -138,7 +135,7 @@ which is equivalent with the following value function ```math \begin{aligned} V^{\pi, \eta}_0(s) &= \mathbf{1}_{G}(s)\\ - V^{\pi, \eta}_k(s) &= \mathbf{1}_{S \setminus O}(s)\mathbb{E}_{s' \sim \eta(s, a, K - k)}[V_{k + 1}(s')] + V^{\pi, \eta}_k(s) &= \mathbf{1}_{S \setminus O}(s)\mathbb{E}_{s' \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(s')] \end{aligned} ``` such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{exact-reach}}(G, K) = V_K(s)`` for a horizon ``K \in \mathbb{N}``. @@ -164,7 +161,7 @@ prop = ExactTimeReachAvoid(reach, avoid, time_horizon) ``` ### Safety -Given an avoid set ``O`` and a horizon ``K \in \mathbb{N} \cup \{\infty\}`` safety is the following objective +Given an avoid set ``O \subset S`` and a horizon ``K \in \mathbb{N} \cup \{\infty\}``, safety is the following objective ```math \mathbb{P}^{\pi, \eta}_{\mathrm{safe}}(O, K) = \mathbb{P}^{\pi, \eta} \left[\omega \in \Omega : \forall k \in \{0, \ldots, K\}, \, \omega[k] \notin O \right]. ``` @@ -172,7 +169,7 @@ This property can by duality with reachability equivalently be states as ``\math ```math \begin{aligned} V^{\pi, \eta}_0(s) &= -\mathbf{1}_{O}(s)\\ - V^{\pi, \eta}_k(s) &= -\mathbf{1}_{O}(s) + \mathbf{1}_{S \setminus O}(s) \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V_{k + 1}(s')] + V^{\pi, \eta}_k(s) &= -\mathbf{1}_{O}(s) + \mathbf{1}_{S \setminus O}(s) \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(s')] \end{aligned} ``` such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{safe}}(G, K) = 1 + V_K(s)``, where for ``K = \infty`` the adversary does not depend on time. @@ -197,10 +194,201 @@ prop = InfiniteTimeSafety([3, 9, 10], convergence_threshold) ``` ### Discounted reward -Discounted reward is similar to reachability but instead of a target set, we have a reward function ``r: S \to \mathbb{R}`` and a discount factor ``\gamma \in (0, 1)``. The objective is then +Given a (state) reward function ``r : S \to \mathbb{R}``, a discount factor ``\nu \in (0, 1)``, and a horizon ``K \in \mathbb{N} \cup \{\infty\}``, a (discounted) reward objective is then follow +```math +\mathbb{E}^{\pi,\eta}_{\mathrm{reward}}(r, \nu, K) = \mathbb{E}^{\pi,\eta}\left[\sum_{k=0}^{K} \nu^k r(\omega[k]) \right]. +``` +For a finite horizon, the discount factor is allowed to be ``\nu = 1``; for the infinite horizon, ``\nu < 1`` is required for convergence. +The property is equivalent to the following value function ```math -{\mathop{opt}\limits_{\pi}}^{\pi} \; {\mathop{opt}\limits_{\eta}}^{\eta} \; \mathbb{E}_{\pi,\eta }\left[\sum_{k=0}^{K} \gamma^k r(\omega(k)) \right]. + \begin{aligned} + V^{\pi, \eta}_0(s) &= r(s)\\ + V^{\pi, \eta}_k(s) &= r(s) + \nu \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(s')] + \end{aligned} +``` +such that ``\mathbb{E}^{\pi,\eta}_{\mathrm{reward}}(r, \nu, K) = V_K(s)``, where for ``K = \infty`` the adversary does not depend on time. + +Example: +```@example +using IntervalMDP # hide +# Finite horizon +time_horizon = 10 +discount_factor = 0.9 + +# Example with a single state variable +rewards = [0.0, 2.0, 1.0, -1.0] # For 4 states +prop = FiniteTimeReward(rewards, discount_factor, time_horizon) + +# Example with 2 state variables of 2 and 4 values respectively +rewards = [ + 0.0 2.0 1.0 -1.0; + 1.0 -1.0 0.0 2.0 +] +prop = FiniteTimeReward(rewards, discount_factor, time_horizon) + +# Infinite horizon +convergence_threshold = 1e-8 +prop = InfiniteTimeReward(rewards, discount_factor, convergence_threshold) +``` + +### Expected exit time +Given a avoid set ``O \subset S``, the expected exit time of the set `S \setminus O` is the following objective +```math +\mathbb{E}^{\pi,\eta}_{\mathrm{exit}}(O) = \mathbb{E}^{\pi,\eta}\left[k : \omega[k] \in O, \, \forall k' \in \{0, \ldots, k\}, \, \omega[k'] \notin O \right]. +``` + +The property is equivalent to the following value function +```math + \begin{aligned} + V^{\pi, \eta}_0(s) &= \mathbf{1}_{S \setminus 0}(s)\\ + V^{\pi, \eta}_k(s) &= \mathbf{1}_{S \setminus O}(s) \left(1 + \mathbb{E}_{s' \sim \eta(s, a)}[V^{\pi, \eta}_{k - 1}(s')]\right) + \end{aligned} +``` +such that ``\mathbb{E}^{\pi,\eta}_{\mathrm{exit}}(O) = V_\infty(s)``. The adversary does not depend on time. + +Example: +```@example +using IntervalMDP # hide + +convergence_threshold = 1e-8 + +# Example with a single state variable +avoid_states = [10] +prop = ExpectedExitTime(avoid_states, convergence_threshold) # Single state variable only + +avoid_states = [(10,)] +prop = ExpectedExitTime(avoid_states, convergence_threshold) # Format available for multiple state variables + +# Example with 3 state variables +avoid_states = [(4, 3, 9)] +prop = ExpectedExitTime(avoid_states, convergence_threshold) ``` ## Complex properties +For complex, temporal properties, it is necessary to use some form of automaton to express the property. In this package, we support specifications via Deterministic Finite Automata (DFA), which via a lazy product construction with an fRMDP allows for efficient implementations of the Bellman operator. DFAs is an important class of task automata as it can express properties in syntactically co-safe Linear Temporal Logic (scLTL) [baier2008principles](@cite) and Linear Temporal Logic over finite traces (LTLf) [de2013linear](@cite). + +Formally, a DFA is a tuple ``\mathcal{A} = (Q, q_0, 2^{\mathrm{AP}}, \delta, F)`` where ``Q`` is a finite set of states, ``q_0 \in Q`` is the initial state, ``2^{\mathrm{AP}}`` is a finite alphabet from atomic proposition ``\mathrm{AP}``, ``\delta : Q \times 2^{\mathrm{AP}} \to Q`` is a transition function, and ``F \subseteq Q`` is a set of accepting states. The DFA accepts a word ``\sigma = \sigma_0 \sigma_1 \ldots \sigma_n`` over the alphabet ``2^{\mathrm{AP}}`` if there exists a sequence of states ``q_0, q_1, \ldots q_n`` such that ``q_{i+1} = \delta(q_i, \sigma_i)`` for all ``0 \geq i < n`` and ``q_n \in F``. We write ``\mathcal{A} \models \sigma`` if the word ``\sigma`` is accepted by the DFA ``\mathcal{A}``. + +A DFA can be constructed like in the following example[^1]: +```@example +using IntervalMDP # hide + +atomic_props = ["a", "b"] + +delta = TransitionFunction([ # Columns: states, rows: input symbols + 1 3 3 # symbol: "" + 2 1 3 # symbol: "a" + 3 3 3 # symbol: "b" + 1 1 1 # symbol: "ab" +]) + +initial_state = 1 + +dfa = DFA(delta, initial_state, atomic_props) +``` +Notice that the DFA does not include the set of accepting states. This is because the accepting states does not impact the Bellman operator and therefore are defined in `DFAReachability` objects, which is shown below. + +```@example +using IntervalMDP # hide + +accepting_states = [3] # Accepting _DFA_ states + +time_horizon = 10 +prop = FiniteTimeDFAReachability(accepting_states, time_horizon) + +convergence_threshold = 1e-8 +prop = InfiniteTimeDFAReachability(accepting_states, convergence_threshold) +``` + +Given an fRMDP ``M = (S, S_0, A, \mathcal{G}, \Gamma)`` and a labeling function ``L : S \to \Sigma`` that maps states of the fRMDP to symbols in the alphabet of the DFA, a path ``\omega = s_0 s_1 \ldots`` in the fRMDP produces a word ``L(s_0) L(s_1) \ldots`` that is accepted by the DFA. The probability of producing a path in the fRMDP that is accepted by the DFA can be expressed via the product construction ``M \otimes \mathcal{A} = (Z, Z_0, A, \Gamma')``, where +- ``Z = S \times Q`` is the set of product states, +- ``Z_0 = S_0 \times \{q_0\}`` is the set of initial product states, +- ``A`` is the set of actions, and +- ``\Gamma' = \{\Gamma_{z, a}}_{z \in Z, a \in A}`` is the joint ambiguity set defined as +```math +\Gamma'_{z, a} = \{\gamma'_{z, a} \in \mathcal{D}(Z) : \exists \gamma_{s, a} \in \Gamma_{s, a} \text{ s.t. } \gamma'_{z, a}(z') = \mathbf{1}_{q'}(\delta(q, L(s'))) \gamma_{s, a}(s')\} +``` +where ``z = (s, q)`` and ``z' = (s', q')``. Then, the probability of generating a path, of length ``K \in \mathbb{N}``, in the fRMDP that is accepted by the DFA is formally defined as +```math +\mathbb{P}^{\pi, \eta}_{\mathrm{dfa-reach}}(F, K) = \mathbb{P}^{\pi, \eta}_{M \otimes \mathcal{A}} \left[\omega \in \Omega : \omega[K] \in S \times F \right]. +``` +Note that this is equivalent to reachability in the product fRMDP ``M \otimes \mathcal{A}``. Therefore, the property can equivalently be stated via the value function for reachability in the product fRMDP. +```math + \begin{aligned} + V^{\pi, \eta}_0(z) &= \mathbf{1}_{S \times F}(z)\\ + V^{\pi, \eta}_k(z) &= \mathbf{1}_{S \times F}(z) + \mathbf{1}_{Z \setminus (S \times F)}(z) \mathbb{E}_{z' \sim \eta(z, a, K - k)}[V^{\pi, \eta}_{k - 1}(z')] + \end{aligned} +``` +such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{dfa-reach}}(F, K) = V_K(z)``. + +Note that the product is never explicitly constructed, for three reasons: (i) the result is an RMDP and not an fRMDP, thus negating the computational benefits of using fRMDPs, (ii) the transition function will be sparse even if some marginals in the original fRMDP are dense, and (iii) the Bellman operator will not be able to leverage the structure of the product construction. Instead, we lazily construct the product as a [`ProductProcess`](@ref), and sequentially update the value function first updating wrt. the DFA transition and then wrt. the fRMDP transition like +```math + \begin{aligned} + V^{\pi, \eta}_0(s, q) &= \mathbf{1}_{F}(q)\\ + W^{\pi, \eta}_k(s', q) &= \mathbf{1}_{F}(q) + \mathbf{1}_{Q \setminus F}(q) V^{\pi, \eta}_{k - 1}(s', \delta(q, L(s')))\\ + V^{\pi, \eta}_k(s, q) &= \mathbb{E}_{s' \sim \eta(s, a, K - k)}[W^{\pi, \eta}_k(s', q)] + \end{aligned} +``` +Notice that ``W^{\pi, \eta}_k(s', q)`` is shared for all ``s \in S`` when updating ``V^{\pi, \eta}_k(s, q)``. This allows for efficient, cache-friendly implementations of the Bellman operator. The kernel for product processes merely forwards, for each DFA state ``q \in Q \setminus F``, the Bellman update to the underlying Bellman operator algorithm, which is chosen based on the fRMDP model type, e.g. IMDP or odIMDP, storage type, e.g. dense or sparse, and hardware, e.g. CPU or CUDA, for efficicency. + +Example of constructing a product process: +```@setup product_process_example +using IntervalMDP + +# Construct DFA +atomic_props = ["a", "b"] + +delta = TransitionFunction([ # Columns: states, rows: input symbols + 1 3 3 # symbol: "" + 2 1 3 # symbol: "a" + 3 3 3 # symbol: "b" + 1 1 1 # symbol: "ab" +]) + +initial_state = 1 + +dfa = DFA(delta, initial_state, atomic_props) + +# Construct fRMDP +prob1 = IntervalAmbiguitySets(; + lower = [ + 0.0 0.5 + 0.1 0.3 + 0.2 0.1 + ], + upper = [ + 0.5 0.7 + 0.6 0.5 + 0.7 0.3 + ], +) + +prob2 = IntervalAmbiguitySets(; + lower = [ + 0.1 0.2 + 0.2 0.3 + 0.3 0.4 + ], + upper = [ + 0.6 0.6 + 0.5 0.5 + 0.4 0.4 + ], +) + +transition_probs = [prob1, prob2] +istates = [Int32(1)] + +mdp = IntervalMarkovDecisionProcess(transition_probs, istates) +``` + +```@example product_process_example +map = [1, 2, 3] # "", "a", "b" +lf = LabellingFunction(map) + +product_process = ProductProcess(mdp, dfa, lf) +``` +The product process can then be used in a [`VerificationProblem`](@ref) or [`ControlSynthesisProblem`](@ref) together with a specification with a DFA property. + +[^1]: The automatic construction of a DFA from scLTL or LTLf formulae is not currently supported, but planned for future releases. \ No newline at end of file diff --git a/src/models/DFA.jl b/src/models/DFA.jl index 0d86a6b5..c98b2f7d 100644 --- a/src/models/DFA.jl +++ b/src/models/DFA.jl @@ -33,6 +33,16 @@ struct DFA{T <: TransitionFunction, DA <: AbstractDict{String, Int32}} <: transition::T # delta : |Q| x |2^{AP}| => |Q| initial_state::Int32 # q_0 labelmap::DA + + function DFA( + transition::T, + initial_state::Int32, + labelmap::DA, + ) where {T <: TransitionFunction, DA <: AbstractDict{String, Int32}} + checkdfa(transition, initial_state, labelmap) + + return new{T, DA}(transition, initial_state, labelmap) + end end function DFA( @@ -41,11 +51,18 @@ function DFA( atomic_propositions::AbstractVector{String}, ) labelmap = atomicpropositions2labels(atomic_propositions) - checkdfa(transition, initial_state, labelmap) return DFA(transition, initial_state, labelmap) end +function DFA( + transition::TransitionFunction, + initial_state::Integer, + atomic_propositions::AbstractVector{String}, +) + return DFA(transition, Int32.(initial_state), atomic_propositions) +end + """ Given vector of atomic_propositions ``AP``, compute power set ``2^{AP}`` Returns the alphabet (powerset) and corresponding index as Dictionary @@ -132,13 +149,6 @@ Return the initial state of the Deterministic Finite Automaton. """ initial_state(dfa::DFA) = dfa.initial_state -""" - accepting_states(dfa::DFA) - -Return the accepting states of the Deterministic Finite Automaton. -""" -accepting_states(dfa::DFA) = dfa.accepting_states - """ getindex(dfa::DFA, q, w) @@ -156,15 +166,12 @@ Base.getindex(dfa::DFA, q, w::String) = dfa[q, dfa.labelmap[w]] Base.iterate(dfa::DFA, state::Int32 = one(Int32)) = state > num_states(dfa) ? nothing : (state, state + one(Int32)) +Base.show(io::IO, dfa::DFA) = showsystem(io, "", "", dfa) + function showsystem(io::IO, first_prefix, prefix, dfa::DFA) # TODO: Print diagram? - println(io, first_prefix, styled"{code:DFA}") + println(io, first_prefix, styled"{code:DFA} (Deterministic Finite Automaton)") println(io, prefix, styled"├─ Number of states: {magenta:$(num_states(dfa))}") println(io, prefix, styled"├─ Number of labels: {magenta:$(num_labels(dfa))}") - println(io, prefix, styled"├─ Initial state: {magenta:$(initial_state(dfa))}") - println( - io, - prefix, - styled"└─ Accepting states: {magenta:$(accepting_states(dfa))}", - ) + println(io, prefix, styled"└─ Initial state: {magenta:$(initial_state(dfa))}") end \ No newline at end of file diff --git a/src/models/ProductProcess.jl b/src/models/ProductProcess.jl index 23fd2fc8..16b8f0b7 100644 --- a/src/models/ProductProcess.jl +++ b/src/models/ProductProcess.jl @@ -99,11 +99,13 @@ source_shape(proc::ProductProcess) = (source_shape(markov_process(proc))..., num action_variables(proc::ProductProcess) = action_variables(markov_process(proc)) action_shape(proc::ProductProcess) = action_shape(markov_process(proc)) -function showsystem(io::IO, prefix, mdp::ProductProcess{M, D, L}) where {M, D, L} - println(io, prefix, styled"{code:ProductProcess}") +Base.show(io::IO, proc::ProductProcess) = showsystem(io, "", "", proc) + +function showsystem(io::IO, first_prefix, prefix, mdp::ProductProcess{M, D, L}) where {M, D, L} + println(io, first_prefix, styled"{code:ProductProcess}") println(io, prefix, "├─ Underlying process:") - showsystem(io, prefix * "├─ ", prefix * "│ ", markov_process(mdp)) + showsystem(io, prefix * "│ ", prefix * "│ ", markov_process(mdp)) println(io, prefix, "├─ Automaton:") - showsystem(io, prefix * "│ ", automaton(mdp)) - println(io, prefix, styled"└─ Labelling type: {magenta:$(L)}") + showsystem(io, prefix * "│ ", prefix * "│ ", automaton(mdp)) + println(io, prefix, styled"└─ Labelling type: {magenta:$(L)}") # TODO: Improve printing of labelling function end \ No newline at end of file diff --git a/src/probabilities/Labelling.jl b/src/probabilities/Labelling.jl index e04d4a1b..deef2acc 100644 --- a/src/probabilities/Labelling.jl +++ b/src/probabilities/Labelling.jl @@ -3,7 +3,7 @@ abstract type AbstractLabelling end """ struct LabellingFunction{ T <: Integer, - VT <: AbstractVector{T} + AT <: AbstractArray{T} } A type representing the labelling of IMDP states into DFA inputs. @@ -15,9 +15,8 @@ Formally, let ``L : S \\to 2^{AP}`` be a labelling function, where Then the ```LabellingFunction``` type is defined as vector which stores the mapping. ### Fields -- `map::VT`: mapping function where indices are (factored) IMDP states and stored values are DFA inputs. +- `map::AT`: mapping function where indices are (factored) IMDP states and stored values are DFA inputs. - `num_inputs::Int32`: number of IMDP states accounted for in mapping. -- `num_outputs::Int32`: number of DFA inputs accounted for in mapping. """ struct LabellingFunction{T <: Integer, AT <: AbstractArray{T}} <: AbstractLabelling diff --git a/src/probabilities/TransitionFunction.jl b/src/probabilities/TransitionFunction.jl index 526602aa..715146e1 100644 --- a/src/probabilities/TransitionFunction.jl +++ b/src/probabilities/TransitionFunction.jl @@ -1,6 +1,6 @@ """ struct TransitionFunction{ - T <: Unsigned, + T <: Integer, MT <: AbstractMatrix{T} } diff --git a/src/specification.jl b/src/specification.jl index ab12ac16..9a5f0746 100644 --- a/src/specification.jl +++ b/src/specification.jl @@ -1242,6 +1242,8 @@ strategy_mode(spec::Specification) = spec.strategy ismaximize(spec::Specification) = ismaximize(strategy_mode(spec)) isminimize(spec::Specification) = isminimize(strategy_mode(spec)) +Base.show(io::IO, spec::Specification) = showspecification(io, "", "", spec) + function showspecification(io::IO, first_prefix, prefix, spec::Specification) println(io, first_prefix, styled"{code:Specification}") println(io, prefix, styled"├─ Satisfaction mode: {magenta:$(satisfaction_mode(spec))}") From e9b6237c71f099f6163b3e1cadfc293c64acf9e8 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 22 Sep 2025 22:31:03 +0200 Subject: [PATCH 35/71] Reduce references heading to not show up in list --- docs/src/models.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/models.md b/docs/src/models.md index 823f3160..730e3184 100644 --- a/docs/src/models.md +++ b/docs/src/models.md @@ -226,7 +226,7 @@ Formally, an fIMDP ``M`` with ``n`` marginals is a tuple ``M = (S, S_0, A, \math The example in [Factored RMDPs](@ref) is also an example of an fIMDP. -## References +### References ```@bibliography Pages = ["models.md"] Canonical = false From 365421e9fc3eba061b88482dce69c4b16c452bff Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 22 Sep 2025 22:38:08 +0200 Subject: [PATCH 36/71] Fix citation indentation issues --- docs/make.jl | 2 +- docs/src/assets/citations.css | 17 +++++++++++++++++ docs/src/references.md | 5 +---- 3 files changed, 19 insertions(+), 5 deletions(-) create mode 100644 docs/src/assets/citations.css diff --git a/docs/make.jl b/docs/make.jl index 6d19c183..6de816b5 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -17,7 +17,7 @@ makedocs(; prettyurls = get(ENV, "CI", "false") == "true", canonical = "https://www.baymler.com/IntervalMDP.jl", edit_link = "main", - assets = String[], + assets = String["assets/citations.css"], ), pages = [ "Home" => "index.md", diff --git a/docs/src/assets/citations.css b/docs/src/assets/citations.css new file mode 100644 index 00000000..b0c63267 --- /dev/null +++ b/docs/src/assets/citations.css @@ -0,0 +1,17 @@ +.citation dl { + display: grid; + grid-template-columns: max-content auto; } +.citation dt { + grid-column-start: 1; } +.citation dd { + grid-column-start: 2; + margin-bottom: 0.75em; } +.citation ul { + padding: 0 0 2.25em 0; + margin: 0; + list-style: none !important;} +.citation ul li { + text-indent: -2.25em; + margin: 0.33em 0.5em 0.5em 2.25em;} +.citation ol li { + padding-left:0.75em;} diff --git a/docs/src/references.md b/docs/src/references.md index 6e968ea4..df106f26 100644 --- a/docs/src/references.md +++ b/docs/src/references.md @@ -1,6 +1,3 @@ # Bibliography ```@bibliography -``` - -!!! todo - Fix citation newline and indentation issues. \ No newline at end of file +``` \ No newline at end of file From b8ef39ee3f30b00d926033ac832f924c0d618de2 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 25 Sep 2025 17:28:06 +0200 Subject: [PATCH 37/71] Document bellman algorithms --- docs/Project.toml | 2 + docs/make.jl | 1 + docs/src/algorithms.md | 311 +++++++++++++++++++++++++------------ docs/src/developer.md | 82 ++++++++++ docs/src/models.md | 19 ++- docs/src/specifications.md | 24 +-- src/algorithms.jl | 2 +- src/workspace.jl | 2 +- 8 files changed, 325 insertions(+), 118 deletions(-) create mode 100644 docs/src/developer.md diff --git a/docs/Project.toml b/docs/Project.toml index ec0de06c..5e888b24 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,6 +1,8 @@ [deps] +Clarabel = "61c947e1-3e6d-4ee4-985a-eec8c727bd6e" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" +HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" IntervalMDP = "051c988a-e73c-45a4-90ec-875cac0402c7" Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" diff --git a/docs/make.jl b/docs/make.jl index 6de816b5..bef6d9b9 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -33,6 +33,7 @@ makedocs(; "Index" => "api.md", ], "Data formats" => "data.md", + "Developer docs" => "developer.md", "References" => "references.md", ], doctest = false, diff --git a/docs/src/algorithms.md b/docs/src/algorithms.md index 50512a89..1218c5f5 100644 --- a/docs/src/algorithms.md +++ b/docs/src/algorithms.md @@ -1,154 +1,267 @@ # Algorithms -!!! todo - Write about floating point precision and rational types. +## Model checking +The core algorithmic component of this package is (robust) value iteration, which is used to solve verification and control synthesis problems for fRMDPs. Value iteration is an iterative algorithm that computes the value function for a given specification by repeatedly applying the [Bellman operator](@ref "Bellman operator algorithms") until convergence. To simplify the dicussion on the algorithmic choices, we will assume that the goal is to compute the maximizing pessimistic probability of reaching a set of states ``G``, that is, - ```math -\max_{\pi} \; \min_{\eta} \; \mathbb{P}_{\pi,\eta }\left[\omega \in \Omega : \exists k \in [0,K], \, \omega(k)\in G \right]. +\max_{\pi} \; \min_{\eta} \; \mathbb{P}^{\pi, \eta}_{\mathrm{reach}}(G, K). ``` -See [Models](@ref) for more details on the formal definition of fRMDPs, strategies, and adversaries; in this case the maximization and minimization operators respectively. The algorithms are easily adapted to other specifications, such as minimizing optimistic probability, which is useful for safety, or maximizing pessimitic discounted reward. Assume furthermore that the transition probabilities are represented as a sparse matrix. -This is the most common representation for large models, and the algorithms are easily adapted to dense matrices with the sorting (see [Sorting](@ref)) being shared across states such that parallelizing this has a smaller impact on performance. +See [Models](@ref) for more details on the formal definition of fRMDPs, strategies, and adversaries; in this case the maximization and minimization operators respectively. The algorithms are easily adapted to [other specifications](@ref "Specifications"). -## Solving reachability as value iteration -Computing the solution to the above problem can be reframed in terms of value iteration. The value function ``V_k`` is the probability of reaching ``G`` in ``k`` steps or fewer. The value function is initialized to ``V_0(s) = 1`` if ``s \in G`` and ``V_0(s) = 0`` otherwise. The value function is then iteratively updated according to the Bellman equation +Computing the solution to the above problem can be framed in terms of value iteration. The value function ``V_k`` is the probability of reaching ``G`` in ``k`` steps or fewer. The value function is initialized to ``V_0(s) = 1`` if ``s \in G`` and ``V_0(s) = 0`` otherwise. The value function is then iteratively updated according to the Bellman equation ```math \begin{aligned} V_{0}(s) &= \mathbf{1}_{G}(s) \\ - V_{k}(s) &= \mathbf{1}_{G}(s) + \mathbf{1}_{S\setminus G}(s) \max_{a \in A} \min_{p_{s,a}\in \Gamma_{s,a}} \sum_{s' \in S} V_{k-1}(s') p_{s,a}(s'), + V_{k}(s) &= \mathbf{1}_{G}(s) + \mathbf{1}_{S \setminus G}(s) \max_{a \in A} \min_{\gamma_{s,a} \in \Gamma_{s,a}} \sum_{t \in S} V_{k-1}(t) \gamma_{s,a}(t), \end{aligned} ``` where ``\mathbf{1}_{G}(s) = 1`` if ``s \in G`` and ``0`` otherwise is the indicator function for set ``G``. This Bellman update is repeated until ``k = K``, or if ``K = \infty``, until the value function converges, i.e. ``V_k = V_{k-1}`` for some ``k``. The value function is then the solution to the problem. -Exact convergence is virtually impossible to achieve in a finite number of iterations due to the finite precision of floating point numbers. Hence, we instead use a residual tolerance ``\epsilon`` and stop when Bellman residual ``V_k - V_{k-1}`` is less than the threshold, ``\|V_k - V_{k-1}\|_\infty < \epsilon``. - -In a more programmatic formulation, the algorithm (for ``K = \infty``) can be summarized as follows: +In a more programmatic formulation, the algorithm can be summarized as follows: ```julia function value_iteration(system, spec) - V = initialize_value_function(spec) + V = initialize_value_function(spec) # E.g. V[s] = 1 if s in G else 0 for reachability - while !converged(V) - V = bellman_update(V, system) + while !converged(V) # or for k in 1:K if K is finite + # Compute max_{a \in A} \min_{γ_{s,a} \in Γ_{s,a}} \sum_{t \in S} V_{k-1}(t) γ_{s,a}(t) for all states s + V = bellman_update(V, system) # System contains information about S, A, and Γ + post_process!(V, spec) # E.g. set V[s] = 1 for s in G for reachability end end ``` +We slightly abuse terminology and call the max/min expectation the Bellman update, even though it is not a proper Bellman operator as it does not include the indicator function for ``G``. The min/max expectation is however shared between all specifications, and thus it is natural to separate it from the specification-dependent post-processing step. -## Efficient value iteration +Note that exact convergence is virtually, impossible, unless using (computationally slow) exact arithmetic, to achieve in a finite number of iterations due to the finite precision of floating point numbers. Hence, we instead use a residual tolerance ``\epsilon`` and stop when Bellman residual ``V_k - V_{k-1}`` is less than the threshold, ``\|V_k - V_{k-1}\|_\infty < \epsilon``. See [Bellman operator algorithms](@ref) for algorithms that support exact arithmetic. -Computing the Bellman update for can be done indepently for each state. -```julia -function bellman_update(V, system) - # Thread.@threads parallelize across available threads - Thread.@threads for s in states(system) - # Minimize over probability distributions in `Gamma_{s,a}`, i.e. pessimistic - V_state = minimize_feasible_dist(V, system, s) - - # Maximize over actions - V[s] = maximum(V_state) - end -end -``` +## Bellman operator algorithms +As the Bellman update is the most computationally intensive part of the algorithm, it is crucial to implement it efficiently including considerations about type stability, pre-allocation and in-place operations, memory access patterns, and parallelization. -For each state, we need to compute the minimum over all feasible distributions per state-action pairs and the maximum over all actions for each state. -The minimum over all feasible distributions can be computed as a solution to a Linear Programming (LP) problem, namely +1. Type stability: the Bellman update should be type stable, i.e. the correct kernel to dispatch to should be inferable at compile time, to avoid dynamic dispatch and heap allocations in the hot loop. This can be achieved by using parametric types and avoiding abstract types in the hot loop. +2. Pre-allocation and in-place operations: to avoid unnecessary allocations and reducing GC pressure, the value function (pre and post Bellman update) is be pre-allocated and updated in-place, and the Bellman update relies on pre-allocated workspace objects. +3. Memory access patterns: to ensure cache efficiency, the memory access pattern should be as contiguous as possible. This is achieved by storing the transition matrices/ambiguity sets in column-major order, where each column corresponds to a source-action pair. +4. Parallelization: to leverage multi-core CPUs and CUDA hardware, the Bellman update should be parallelized across source-states and in the case of CUDA, also across actions and target states. +A challenge with designing Bellman operator algorithms for fRMDPs is that ``\min_{\gamma_{s,a} \in \Gamma_{s,a}} \sum_{t \in S} V_{k-1}(t) \gamma_{s,a}(t)`` is not always computable exactly, and thus, we must resort to sound approximations. For IMDPs, the minimum can be computed exactly via [O-maximization](@ref). Below, we will describe different algorithms for computing the Bellman update, their trade-offs, and algorithmic choices for an efficient implementation. + +### O-maximization +In case of an IMDP, the minimum over all feasible distributions can be computed as a solution to a Linear Programming (LP) problem, namely ```math \begin{aligned} - \min_{p_{s,a}} \quad & \sum_{s' \in S} V_{k-1}(s') \cdot p_{s,a}(s'), \\ - \quad & \underline{P}(s,a,s') \leq p_{s,a}(s') \leq \overline{P}(s,a,s') \quad \forall s' \in S, \\ - \quad & \sum_{s' \in S} p_{s,a}(s') = 1. \\ + \min_{\gamma_{s, a}} \quad & \sum_{t \in S} V_{k-1}(t) \cdot \gamma_{s, a}(t), \\ + \quad & \underline{\gamma}_{s, a}(t) \leq \gamma_{s, a}(t) \leq \overline{\gamma}_{s, a}(t) \quad \forall t \in S, \\ + \quad & \sum_{t \in S} \gamma_{s,a}(t) = 1. \end{aligned} ``` - -However, due to the particular structure of the LP problem, we can use a more efficient algorithm: O-maximization, or ordering-maximization [1]. +However, due to the particular structure of the LP problem, we can use a more efficient algorithm: O-maximization, or ordering-maximization [givan2000bounded, lahijanian2015formal](@cite). In the case of pessimistic probability, we want to assign the most possible probability mass to the destinations with the smallest value of ``V_{k-1}``, while obeying that the probability distribution is feasible, i.e. within the probability bounds and that it sums to 1. This is done by sorting the values of ``V_{k-1}`` and then assigning state with the smallest value its upper bound, then the second smallest, and so on until the remaining mass must be assigned to the lower bound of the remaining states for probability distribution is feasible. ```julia -function minimize_feasible_dist(V, system, s) +function min_value(V, system, source, action) # Sort values of `V` in ascending order order = sortperm(V) # Initialize distribution to lower bounds - p = lower_bounds(system, s) - rem = 1 - sum(p) + p = lower_bounds(system, source, action) + budget = 1 - sum(p) # Assign upper bounds to states with smallest values # until remaining mass is zero for idx in order - gap = upper_bounds(system, s)[idx] - p[idx] - if rem <= gap - p[idx] += rem + gap = upper_bounds(system, source, action)[idx] - p[idx] + if budget <= gap + p[idx] += budget break else p[idx] += gap - rem -= gap + budget -= gap end end - return p + v = dot(V, p) + return v end ``` -We abstract this algorithm into the sorting phase and the O-maximization phase: -```julia -function minimize_feasible_dist(V, system, s) - # Sort values of `V` in ascending order - order = sortstates(V) - p = o_maximize(system, s, order) - return p -end +For fIMDPs, O-maximization can be applied recursively over the marginals as a sound under-approximation of the minimum [mathiesen2025scalable](@cite). Let ``S = S_1 \times \cdots \times S_n`` be the state space factored into ``n`` state variables, and let ``\Gamma_{s,a} = \Gamma^1_{s,a} \times \cdots \times \Gamma^n_{s,a}`` be the transition ambiguity sets factored into ``n`` marginals. Then, we can compute a bound on the minimum as +```math + \begin{aligned} + W_{s,a}^{k,n}(t^1, \ldots, t^n) &= V_{k - 1}(t)\\ + W_{s,a}^{k,i-1}(t^1, \ldots, t^{i-1}) &= \min_{\gamma^i_{s,a} \in \Gamma^i_{s,a}} \sum_{t^i \in S_i} W_{s,a}^{k,i}( + t^1, \ldots, t^i) \gamma^i_{s,a}(t^i),\\ + &\qquad \qquad \text{ for } i = 2, \ldots, n \\ + W_{s,a}^{k} := W_{s,a}^{k,0} &= \min_{\gamma^1_{s,a} \in \Gamma^1_{s,a}} \sum_{t^1 \in S_1} W_{s,a}^{k,1}(t^1) \gamma^1_{s,a}(t^1). + \end{aligned} ``` - -When computing computing the above on a GPU, we can and should parallelize both the sorting and the O-maximization phase. -In the following two sections, we will discuss how parallelize these phases. - -### Sorting -Sorting in parallel on the GPU is a well-studied problem, and there are many algorithms for doing so. We choose to use bitonic sorting, which is a sorting network that is easily parallelized and implementable on a GPU. The idea is to merge bitonic subsets, i.e. sets with first increasing then decreasing subsets of equal size, of increasingly larger sizes and perform minor rounds of swaps to maintain the bitonic property. The figure below shows 3 major rounds to sort a set of 8 elements (each line represents an element, each arrow is a comparison pointing towards the larger element). The latency[^1] of the sorting network is ``O((\lg n)^2)``, and thus it scales well to larger number of elements. See [Wikipedia](https://en.wikipedia.org/wiki/Bitonic_sorter) for more details. - -![](assets/bitonic_sorting.svg) - - -### O-maximization -In order to parallelize the O-maximization phase, observe that O-maximization implicity implements a cumulative sum according to the ordering over gaps and this is the only dependency between the states. Hence, if we can parallelize this cumulative sum, then we can parallelize the O-maximization phase. -Luckily, there is a well-studied algorithm for computing the cumulative sum in parallel: tree reduction for prefix scan. The idea is best explained with figure below. - -![](assets/tree_reduction_prefix_scan.svg) - -Here, we recursively compute the cumulative sum of larger and larger subsets of the array. The latency is ``O(\lg n)``, and thus very efficient. See [Wikipedia](https://en.wikipedia.org/wiki/Prefix_sum) for more details. When implementing the tree reduction on GPU, it is possible to use warp shuffles to very efficiently perform tree reductions of up to 32 elements. For larger sets, shared memory to store the intermediate results, which is much faster than global memory. See [CUDA Programming Model](@ref) for more details on why these choices are important. - -Putting it all together, we get the following (pseudo-code) algorithm for O-maximization: -```julia -function o_maximize(system, s, order) - p = lower_bounds(system, s) - rem = 1 - sum(p) - gap = upper_bounds(system, s) - p - - # Ordered cumulative sum of gaps - cumgap = cumulative_sum(gap[order]) - - @parallelize for (i, o) in enumerate(order) - rem_state = max(rem - cumgap[i] + gap[o], 0) - if gap[o] < rem_state - p[o] += gap[o] - else - p[o] += rem_state - break - end - end - - return p -end +Then, ``V_k(s) := \mathbf{1}_{G}(s) + \mathbf{1}_{S \setminus G}(s) \max_{a \in A} W_{s,a}^{k}``. Note that this is strictly better than building a joint ambiguity set by multiplying the marginal interval bounds [mathiesen2025scalable](@cite). + +The algorithm is the default Bellman algorithm for IMDPs, but not for fIMDPs. To explicitly select (recursive) O-maximization, do the following: +```@setup explicit_omax +using IntervalMDP +N = Float64 + +prob1 = IntervalAmbiguitySets(; + lower = N[ + 0 1//2 + 1//10 3//10 + 1//5 1//10 + ], + upper = N[ + 1//2 7//10 + 3//5 1//2 + 7//10 3//10 + ], +) + +prob2 = IntervalAmbiguitySets(; + lower = N[ + 1//10 1//5 + 1//5 3//10 + 3//10 2//5 + ], + upper = N[ + 3//5 3//5 + 1//2 1//2 + 2//5 2//5 + ], +) + +prob3 = IntervalAmbiguitySets(; + lower = N[ + 0 0 + 0 0 + 1 1 + ], + upper = N[ + 0 0 + 0 0 + 1 1 + ] +) + +transition_probs = [prob1, prob2, prob3] + +mdp = IntervalMarkovDecisionProcess(transition_probs) +prop = FiniteTimeReachability([3], 10) # Reach state 3 within 10 timesteps +spec = Specification(prop, Pessimistic, Maximize) +problem = VerificationProblem(mdp, spec) ``` +```@example explicit_omax +alg = RobustValueIteration(OMaximization()) +result = solve(problem, alg) +nothing # hide +``` +O-maximization supports both floating point and exact arithmetic, and it is implemented for both CPU and CUDA hardware. + +### Vertex enumeration +A way to compute the minimum exactly for fIMDPs, and in general polytopic ambiguity sets, is via vertex enumeration [schnitzer2025efficient](@cite). The idea is to enumerate the Cartesian product of all vertices of each polytope and then compute the minimum over the vertices. This is however only feasible for few state values along each marginal, as the potential number of vertices for each marginal can grow with the factorial of the number of state values, and exponentially in the number of dimensions. Hence, this algorithm is only feasible for small problems, but it is included for completeness and as a reference implementation. To use vertex enumeration, do the following: +```@setup explicit_vertex +using IntervalMDP +N = Float64 + +state_vars = (2, 3) +action_vars = (1, 2) + +marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 7//30 1//15 13//30 4//15 1//6 + 2//5 7//30 1//30 11//30 2//15 1//10 + ], + upper = N[ + 17//30 7//10 2//3 4//5 7//10 2//3 + 9//10 13//15 9//10 5//6 4//5 14//15 + ] +), (1, 2), (1,), (2, 3), (1,)) + +marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//30 1//3 1//6 1//15 2//5 2//15 + 4//15 1//4 1//6 1//30 2//15 1//30 + 2//15 7//30 1//10 7//30 7//15 1//5 + ], + upper = N[ + 2//3 7//15 4//5 11//30 19//30 1//2 + 23//30 4//5 23//30 3//5 7//10 8//15 + 7//15 4//5 23//30 7//10 7//15 23//30 + ] +), (2,), (2,), (3,), (2,)) + +mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) + +prop = FiniteTimeReachability([(2, 3)], 10) # Reach state (2, 3) within 10 timesteps +spec = Specification(prop, Pessimistic, Maximize) +problem = VerificationProblem(mdp, spec) +``` +```@example explicit_vertex +alg = RobustValueIteration(VertexEnumeration()) +result = solve(problem, alg) +nothing # hide +``` +The implementation iterates vertex combinations in a lazy manner, and thus, it does not store all vertices in memory. Furthermore, efficient generation of vertices for each marginal is done via backtracking to avoid enumerating all possible orderings. -## CUDA Programming Model -We here give a brief introduction to the CUDA programming model to understand to algorithmic choices. For a more in-depth introduction, see the [CUDA C++ Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html). The CUDA framework is Single-Instruction Multiple-Thread (SIMT) parallel execution platform and Application Programming Interface. This is in contrast to Single-Instruction Multiple-Data where all data must be processed homogeneously without control flow. SIMT makes CUDA more flexible for heterogeneous processing and control flow. The smallest execution unit in CUDA is a thread, which is a sequential processing of instructions. A thread is uniquely identified by its thread index, which allows indexing into the global data for parallel processing. A group of 32 threads[^2] is called a warp, which will be executed _mostly_ synchronously on a streaming multiprocessor. If control flow makes threads in a wrap diverge, instructions may need to be decoded twice and executed in two separate cycles. Due to this synchronous behavior, data can be shared in registers between threads in a warp for maximum performance. A collection of (up to) 1024 threads is called a block, and this is the largest aggregation that can be synchronized. Furthermore, threads in a block share the appropriately named shared memory. This is memory that is stored locally on the streaming multiprocessor for fast access. Note that shared memory is unintuitively faster than local memory (not to be confused with registers) due to local memory being allocated in device memory. Finally, a collection of (up to) 65536 blocks is called the grid of a kernel, which is the set of instructions to be executed. The grid is singular as only a single ever exists per launched kernel. Hence, if more blocks are necessary to process the amount of data, then a grid-strided loop or multiple kernels are necessary. - -![](assets/cuda_programming_model.svg) +Vertex enumeration supports both floating point and exact arithmetic. +### Recursive McCormick envelopes +Another method for computing a sound under-approximation of the minimum for fIMDPs is via recursive McCormick envelopes [schnitzer2025efficient](@cite). The idea is to relace each bilinear term ``\gamma^1_{s, a}(t^1) \cdot \gamma^2_{s, a}(t^2)`` in ``\sum_{t \in S} V_{k-1}(') \gamma^1_{s, a}(t^1) \cdot \gamma^2_{s, a}(t^2)`` (for a system with two marginals) with a new variable ``q_{s, a}(t^1, t^2)`` and add linear McCormick constraints to ensure that ``q_{s, a}(t^1, t^2)`` is an over-approximation of the bilinear term. That is, +```math + \begin{aligned} + q_{s, a}(t^1, t^2) &\geq \underline{\gamma}^1_{s,a}(t^1) \cdot \gamma^2_{s,a}(t^2) + \underline{\gamma}^2_{s,a}(t^2) \cdot \gamma^1_{s,a}(t^1) - \underline{\gamma}^1_{s,a}(t^1) \cdot \underline{\gamma}^2_{s,a}(t^2), \\ + q_{s, a}(t^1, t^2) &\geq \overline{\gamma}^1_{s,a}(t^1) \cdot \gamma^2_{s,a}(t^2) + \overline{\gamma}^2_{s,a}(t^2) \cdot \gamma^1_{s,a}(t^1) - \overline{\gamma}^1_{s,a}(t^1) \cdot \overline{\gamma}^2_{s,a}(t^2), \\ + q_{s, a}(t^1, t^2) &\leq \underline{\gamma}^1_{s,a}(t^1) \cdot \gamma^2_{s,a}(t^2) + \overline{\gamma}^2_{s,a}(t^2) \cdot \gamma^1_{s,a}(t^1) - \underline{\gamma}^1_{s,a}(t^1) \cdot \overline{\gamma}^2_{s,a}(t^2), \\ + q_{s, a}(t^1, t^2) &\leq \overline{\gamma}^1_{s,a}(t^1) \cdot \gamma^2_{s,a}(t^2) + \underline{\gamma}^2_{s,a}(t^2) \cdot \gamma^1_{s,a}(t^1) - \overline{\gamma}^1_{s,a}(t^1) \cdot \underline{\gamma}^2_{s,a}(t^2). + \end{aligned} +``` +In addition, we add the constraint that ``\sum_{t^1 \in S_1} \sum_{t^2 \in S_2} q_{s, a}(t^1, t^2) = 1`` such that ``q_{s, a}`` is a valid probability distribution. + +This results in a Linear Programming (LP) problem that can be solved efficiently. The McCormick envelopes can be applied recursively for more than two marginals. The algorithm is more efficient than vertex enumeration and is thus the default Bellman algorithm for fIMDPs. + +To use recursive McCormick envelopes, do the following: +```@setup explicit_mccormick +using IntervalMDP, HiGHS +N = Float64 + +state_vars = (2, 3) +action_vars = (1, 2) + +marginal1 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//15 7//30 1//15 13//30 4//15 1//6 + 2//5 7//30 1//30 11//30 2//15 1//10 + ], + upper = N[ + 17//30 7//10 2//3 4//5 7//10 2//3 + 9//10 13//15 9//10 5//6 4//5 14//15 + ] +), (1, 2), (1,), (2, 3), (1,)) + +marginal2 = Marginal(IntervalAmbiguitySets(; + lower = N[ + 1//30 1//3 1//6 1//15 2//5 2//15 + 4//15 1//4 1//6 1//30 2//15 1//30 + 2//15 7//30 1//10 7//30 7//15 1//5 + ], + upper = N[ + 2//3 7//15 4//5 11//30 19//30 1//2 + 23//30 4//5 23//30 3//5 7//10 8//15 + 7//15 4//5 23//30 7//10 7//15 23//30 + ] +), (2,), (2,), (3,), (2,)) + +mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) +prop = FiniteTimeReachability([(2, 3)], 10) # Reach state (2, 3) within 10 timesteps +spec = Specification(prop, Pessimistic, Maximize) +problem = VerificationProblem(mdp, spec) +``` +```@example explicit_mccormick +# Use default LP solver (HiGHS) +alg = RobustValueIteration(LPMcCormickRelaxation()) -[1] M. Lahijanian, S. B. Andersson and C. Belta, "Formal Verification and Synthesis for Discrete-Time Stochastic Systems," in IEEE Transactions on Automatic Control, vol. 60, no. 8, pp. 2031-2045, Aug. 2015, doi: 10.1109/TAC.2015.2398883. +# Choose a different LP solver +using Clarabel +alg = RobustValueIteration(LPMcCormickRelaxation(; lp_solver=Clarabel.Optimizer)) -[^1]: Note that when assessing parallel algorithms, the asymptotic performance is measured by the latency, which is the delay in the number of parallel operations, before the result is available. This is in contrast to traditional algorithms, which are assessed by the total number of operations. +result = solve(problem, alg) +nothing # hide +``` -[^2]: with consecutive thread indices aligned to a multiple of 32. \ No newline at end of file +See the [JuMP documentation](https://jump.dev/JuMP.jl/stable/installation/#Supported-solvers) for a list of supported LP solvers. The recursive McCormick envelopes Bellman operator algorithm supports primarily floating point, but also exact arithmetic if the chosen LP solver does. \ No newline at end of file diff --git a/docs/src/developer.md b/docs/src/developer.md new file mode 100644 index 00000000..59e7f217 --- /dev/null +++ b/docs/src/developer.md @@ -0,0 +1,82 @@ +# Developer documentation + +## Bellman algorithms +### [O-maximization](@id dev-docs-omax) +To optimize the procedure, we abstract the O-maximization algorithm into the sorting phase and the O-maximization phase: +```julia +function min_value(V, system, source, action) + # Sort values of `V` in ascending order + order = sortstates(V) + v = o_maximize(system, source, action, order) + return v +end +``` +Notice the the order is shared for all source-action pairs, and thus, we can pre-compute it once per Bellman update. We however only do so for dense transition ambiguity sets, as in the sparse case, it is often faster to sort repeatedly, but only for the support. I.e., +```julia +function sortstates(V, system, source, action) # I.e. sort per source-action pair + supp = support(system, source, action) + order = sortperm(@view(V[supp])) # Sort only for the support + return supp[order] # Return sorted indices in original indexing +end +``` + +#### GPU acceleration +The sorting and O-maximization phases can be parallelized on the GPU to leverage the massive parallelism. The following assumes that the reader is familiar with the CUDA programming model; see [CUDA Programming Model](@ref) for a brief introduction. The specific execution plan depends on the storage type and size of model; please refer to the source code for specifics. + +##### Sorting +Sorting in parallel on the GPU is a well-studied problem, and there are many algorithms for doing so. We choose to use bitonic sorting, which is a sorting network that is easily parallelized and implementable on a GPU. The idea is to merge bitonic subsets, i.e. sets with first increasing then decreasing subsets of equal size, of increasingly larger sizes and perform minor rounds of swaps to maintain the bitonic property. The figure below shows 3 major rounds to sort a set of 8 elements (each line represents an element, each arrow is a comparison pointing towards the larger element). The latency[^1] of the sorting network is ``O((\lg n)^2)``, and thus it scales well to larger number of elements. See [Wikipedia](https://en.wikipedia.org/wiki/Bitonic_sorter) for more details. + +![](assets/bitonic_sorting.svg) + + +##### O-maximization phase +In order to parallelize the O-maximization phase, observe that O-maximization implicity implements a cumulative sum according to the ordering over gaps and this is the only dependency between the states. Hence, if we can parallelize this cumulative sum, then we can parallelize the O-maximization phase. +Luckily, there is a well-studied algorithm for computing the cumulative sum in parallel: tree reduction for prefix scan. The idea is best explained with figure below. + +![](assets/tree_reduction_prefix_scan.svg) + +Here, we recursively compute the cumulative sum of larger and larger subsets of the array. The latency is ``O(\lg n)``, and thus very efficient. See [Wikipedia](https://en.wikipedia.org/wiki/Prefix_sum) for more details. Putting it all together, we get the following (pseudo-code) algorithm for O-maximization: +```julia +function o_maximize(system, source, action, order) + p = lower_bounds(system, source, action) + rem = 1 - sum(p) + gap = upper_bounds(system, source, action) - p + + # Ordered cumulative sum of gaps via tree reduction + cumgap = cumulative_sum(gap[order]) + + @parallelize for (i, o) in enumerate(order) + rem_state = max(rem - cumgap[i] + gap[o], 0) + if gap[o] < rem_state + p[o] += gap[o] + else + p[o] += rem_state + break + end + end + + return p +end +``` +When implementing the algorithm above in CUDA, it is possible to use warp shuffles to very efficiently perform tree reductions of up to 32 elements. For larger sets, shared memory to store the intermediate results, which is much faster than global memory. See [CUDA Programming Model](@ref) for more details on why these choices are important. + +### [Vertex enumeration](@id dev-docs-vertex-enumeration) +First, we concern ourselves with enumerating the vertices of a single marginal. The key observation for an efficient algorithm is that, while each vertex corresponds to a unique ordering of the states, many orderings yield the same vertex. Thus, we need an algorithm that generates each vertex exactly once without generating all orderings explicitly. To this end, we rely on a backtracking algorithm where state values are added to a list of a "maximizing" state values, and backtrack once a vertex is found, i.e. `sum(p) == 1` and the remaining state values are assigned a lower bound. + +For the product of marginals, we simply apply ``Iterators.product`` to get an iterator over all combinations of vertices. + +### [Recursive McCormick envelopes](@id dev-docs-mccormick) +The recursive McCormick envelopes for polytoptic fRMDPs are described in [schnitzer2025efficient](@cite), with the addition that we add the marginal constraints to the linear program and the constraint that each relaxation `q` in the recursion is a valid probability distribution, i.e. `sum(q) == 1`. + +Another consideration is whether to recursively relax as a sequence of marginals or as a binary tree. In [schnitzer2025efficient](@cite), the recursive relaxation is done as a sequence. However, the tree structure requires significantly fewer auxiliary variables and thus both memory and time. A formal argument of the resulting minimum value between the two relaxation structures is missing, but empirically, they yield the same results. + + +## CUDA Programming Model +We here give a brief introduction to the CUDA programming model to understand to algorithmic choices. For a more in-depth introduction, see the [CUDA C++ Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html). The CUDA framework is Single-Instruction Multiple-Thread (SIMT) parallel execution platform and Application Programming Interface. This is in contrast to Single-Instruction Multiple-Data where all data must be processed homogeneously without control flow. SIMT makes CUDA more flexible for heterogeneous processing and control flow. The smallest execution unit in CUDA is a thread, which is a sequential processing of instructions. A thread is uniquely identified by its thread index, which allows indexing into the global data for parallel processing. A group of 32 threads[^2] is called a warp, which will be executed _mostly_ synchronously on a streaming multiprocessor. If control flow makes threads in a wrap diverge, instructions may need to be decoded twice and executed in two separate cycles. Due to this synchronous behavior, data can be shared in registers between threads in a warp for maximum performance. A collection of (up to) 1024 threads is called a block, and this is the largest aggregation that can be synchronized. Furthermore, threads in a block share the appropriately named shared memory. This is memory that is stored locally on the streaming multiprocessor for fast access. Note that shared memory is unintuitively faster than local memory (not to be confused with registers) due to local memory being allocated in device memory. Finally, a collection of (up to) 65536 blocks is called the grid of a kernel, which is the set of instructions to be executed. The grid is singular as only a single ever exists per launched kernel. Hence, if more blocks are necessary to process the amount of data, then a grid-strided loop or multiple kernels are necessary. + +![](assets/cuda_programming_model.svg) + + +[^1]: Note that when assessing parallel algorithms, the asymptotic performance is measured by the latency, which is the delay in the number of parallel operations, before the result is available. This is in contrast to traditional algorithms, which are assessed by the total number of operations. + +[^2]: with consecutive thread indices aligned to a multiple of 32. \ No newline at end of file diff --git a/docs/src/models.md b/docs/src/models.md index 730e3184..e74555c9 100644 --- a/docs/src/models.md +++ b/docs/src/models.md @@ -10,7 +10,7 @@ For ``n`` finite sets ``S_1, \ldots, S_n`` we denote by ``S_1 \times \cdots \tim ```math \Gamma = \left\{ \gamma \in \mathcal{D}(S) \,:\, \gamma(s) = \prod_{i=1}^n \gamma^i(s^i), \, \gamma^i \in \Gamma_i \right\} ``` -where ``s = (s_1, \ldots, s_n) \in S``. We will denote the product ambiguity set as ``\Gamma = \bigotimes_{i=1}^n \Gamma_i``. Each ``\Gamma_i`` is called a marginal or component ambiguity set. +where ``s = (s_1, \ldots, s_n) \in S``. We will denote the product ambiguity set as ``\Gamma = \bigotimes_{i=1}^n \Gamma_i``. Each ``\Gamma_i`` is called a marginal or component ambiguity set. A transition is a triplet ``(s, a, t) \in S \times A \times S`` where ``s`` is the source state, ``a`` is the action, and ``t`` is the target state. ## Factored RMDPs Factored Robust Markov Decision Processes (fRMDPs) [schnitzer2025efficient, delgado2011efficient](@cite) are an extension of Robust Markov Decision Processes (RMDPs) [nilim2005robust, wiesemann2013robust, suilen2024robust](@cite) that incorporate a factored representation of the state and action spaces, i.e. with state and action variables. This allows for a more compact representation of the transition model and flexibility in modeling complex systems. First, we define here fRMDPs, and then in the subsequent sections, we define various special subclasses of fRMDPs, including how they relate to each other and to fRMDPs. @@ -23,10 +23,10 @@ Formally, a fRMDP ``M`` is a tuple ``M = (S, S_0, A, \mathcal{G}, \Gamma)``, whe - ``\mathcal{G} = (\mathcal{V}, \mathcal{E})`` is a directed bipartite graph with nodes ``\mathcal{V} = \mathcal{V}_{ind} \cup \mathcal{V}_{cond} = \{S_1, \ldots, S_n, A_1, \ldots, A_m\} \cup \{S'_1, \ldots, S'_n\}`` representing the state and action variables and their next-state counterparts, and edges ``\mathcal{E} \subseteq \mathcal{V}_{ind} \times \mathcal{V}_{cond}`` representing dependencies of ``S'_i`` on ``S_j`` and ``A_k``, - ``\Gamma = \{\Gamma_{s,a}\}_{s\in S,a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\Gamma_{s,a} = \bigotimes_{i=1}^n \Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` is a product of ambiguity sets ``\Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` along each marginal ``i`` conditional on the values in ``(s, a)`` of the parent variables ``\text{Pa}_\mathcal{G}(S'_i)`` of ``S'_i`` in ``\mathcal{G}``, i.e. ```math - \Gamma_{s,a} = \left\{ \gamma \in \mathcal{D}(S) \,:\, \gamma(s') = \prod_{i=1}^n \gamma^i(s'_i | s_{\text{Pa}_{\mathcal{G}_S}(S'_i)}, a_{\text{Pa}_{\mathcal{G}_A}(S'_i)}), \, \gamma^i(\cdot | s_{\text{Pa}_{\mathcal{G}_S}(S'_i)}, a_{\text{Pa}_{\mathcal{G}_A}(S'_i)}) \in \Gamma^i_{\text{Pa}_\mathcal{G}(S'_i)} \right\}. + \Gamma_{s,a} = \left\{ \gamma \in \mathcal{D}(S) \,:\, \gamma(t) = \prod_{i=1}^n \gamma^i(t_i | s_{\text{Pa}_{\mathcal{G}_S}(S'_i)}, a_{\text{Pa}_{\mathcal{G}_A}(S'_i)}), \, \gamma^i(\cdot | s_{\text{Pa}_{\mathcal{G}_S}(S'_i)}, a_{\text{Pa}_{\mathcal{G}_A}(S'_i)}) \in \Gamma^i_{\text{Pa}_\mathcal{G}(S'_i)} \right\}. ``` -For a given source-action pair ``(s, a) \in S \times A``, any distribution ``\gamma_{s, a} \in \Gamma_{s,a}`` is called a feasible distribution, and feasible transitions are triplets ``(s, a, s') \in S \times A \time S`` where ``s' \in \mathop{supp}(\gamma_{s, a})`` for any feasible distribution ``\gamma_{s, a} \in \Gamma_{s, a}``. A path of an fRMDP is a sequence of states and actions ``\omega = s[0], a[0], s[1], a[1], \dots`` where ``s[k] \in S`` and ``a[k] \in A`` for all ``k \in \mathbb{N}_0``, and ``(s[k], a[k], s[k + 1])`` is a feasible transition for all ``k \in \mathbb{N}_0``. We denote by ``\omega[k] = s[k]`` the state of the path at time ``k \in \mathbb{N}_0`` and by ``\Omega`` and ``\Omega_{fin}`` the set of all infinite and finite paths, respectively. +For a given source-action pair ``(s, a) \in S \times A``, any distribution ``\gamma_{s, a} \in \Gamma_{s,a}`` is called a feasible distribution, and feasible transitions are triplets ``(s, a, t) \in S \times A \times S`` where ``t \in \mathop{supp}(\gamma_{s, a})`` for any feasible distribution ``\gamma_{s, a} \in \Gamma_{s, a}``. A path of an fRMDP is a sequence of states and actions ``\omega = s[0], a[0], s[1], a[1], \dots`` where ``s[k] \in S`` and ``a[k] \in A`` for all ``k \in \mathbb{N}_0``, and ``(s[k], a[k], s[k + 1])`` is a feasible transition for all ``k \in \mathbb{N}_0``. We denote by ``\omega[k] = s[k]`` the state of the path at time ``k \in \mathbb{N}_0`` and by ``\Omega`` and ``\Omega_{fin}`` the set of all infinite and finite paths, respectively. A _strategy_ or _policy_ for an fRMDP is a function ``\pi : \Omega_{fin} \to A`` that assigns an action, given a (finite) path called the history. _Time-dependent_ Markov strategies are functions from state and time step to an action, i.e. ``\pi : S \times \mathbb{N}_0 \to A``. This can equivalently be described as a sequence of functions indexed by time ``\mathbf{\pi} = (\pi[0], \pi[1], \ldots)``. If ``\pi`` does not depend on time and solely depends on the current state, it is called a _stationary_ strategy. Similar to a strategy, an adversary ``\eta`` is a function that assigns a feasible distribution to a given state. The focus of this package is on dynamic uncertainties where the choice of the adversary is resolved at every time step, called dynamic uncertainty, and where the adversary has access to both the current state and action, called ``(s, a)``-rectangularity. We refer to [suilen2024robust](@cite) for further details on the distinction between static and dynamic uncertainties, types of rectangularity, and their implications. Given a strategy and an adversary, an fRMDP collapses to a finite (factored) Markov chain. @@ -42,7 +42,10 @@ action_indices = (1,) state_dims = (2, 3) action_dims = (1,) marginal1 = Marginal(IntervalAmbiguitySets(; - lower = [ # 6 ambiguity sets = 2 * 3 source states, 1 action + # 6 ambiguity sets = 2 * 3 source states, 1 action + # Column layout: (a¹₁, s¹₁, s²₁), (a¹₁, s¹₂, s²₁), (a¹₁, s¹₁, s²₂), (a¹₁, s¹₂, s²₂), (a¹₁, s¹₁, s²₃), (a¹₁, s¹₂, s²₃) + # Equivalent to CartesianIndices(actions_dims..., state_dims...), i.e. actions first, then states in lexicographic order + lower = [ 1/15 7/30 1/15 13/30 4/15 1/6 2/5 7/30 1/30 11/30 2/15 1/10 ], @@ -57,7 +60,10 @@ action_indices = (2,) state_dims = (3,) action_dims = (2,) marginal2 = Marginal(IntervalAmbiguitySets(; - lower = [ # 6 ambiguity sets = 3 source states, 2 actions + # 6 ambiguity sets = 3 source states, 2 actions + # Column layout: (a²₁, s²₁), (a²₂, s²₁), (a²₁, s²₂), (a²₂, s²₂), (a²₁, s²₃), (a²₂, s²₃) + # Equivalent to CartesianIndices(actions_dims..., state_dims...), i.e. actions first, then states in lexicographic order + lower = [ 1/30 1/3 1/6 1/15 2/5 2/15 4/15 1/4 1/6 1/30 2/15 1/30 2/15 7/30 1/10 7/30 7/15 1/5 @@ -73,6 +79,9 @@ initial_states = [(1, 1)] # Initial states are optional mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2), initial_states) ``` +!!! warn + Notice that source-action pairs are on the columns of the matrices to defined the interval bounds. This is counter to most literature on transition matrices where transitions are from row to column. The choice of layout is to ensure that the memory access pattern is cache-friendly, as each column is stored contiguously in memory (column-major) and the Bellman updates iterate outer-most over source-action pairs. However, it also has a fundamental mathematical justification: the transition matrix can be viewed as a linear operator and the matrix form of a linear operator is defined such that the columns correspond to the input dimensions, i.e. from column to row. Furthermore, actions for the same source state are stored contiguously, which is also important for cache efficiency. + ## IMCs Interval Markov Chains (IMCs) [delahaye2011decision](@cite) are a subclass of fRMDPs and a generalization of Markov Chains (MCs), where the transition probabilities are not known exactly, but they are constrained to be in some probability interval. Formally, an IMC ``M`` is a tuple ``M = (S, S_0, \Gamma)``, where diff --git a/docs/src/specifications.md b/docs/src/specifications.md index 2f210dad..916e44b3 100644 --- a/docs/src/specifications.md +++ b/docs/src/specifications.md @@ -38,7 +38,7 @@ The property is equivalent to the following value function ```math \begin{aligned} V^{\pi, \eta}_0(s) &= \mathbf{1}_{G}(s)\\ - V^{\pi, \eta}_k(s) &= \mathbf{1}_{G}(s) + \mathbf{1}_{S \setminus G}(s) \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(s')] + V^{\pi, \eta}_k(s) &= \mathbf{1}_{G}(s) + \mathbf{1}_{S \setminus G}(s) \mathbb{E}_{t \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(t)] \end{aligned} ``` such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{reach}}(G, K) = V_K(s)``, where for ``K = \infty`` the adversary does not depend on time. @@ -69,7 +69,7 @@ which is equivalent with the following value function ```math \begin{aligned} V^{\pi, \eta}_0(s) &= \mathbf{1}_{G}(s)\\ - V^{\pi, \eta}_k(s) &= \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(s')] + V^{\pi, \eta}_k(s) &= \mathbb{E}_{t \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(t)] \end{aligned} ``` such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{exact-reach}}(G, K) = V_K(s)`` for a horizon ``K \in \mathbb{N}``. @@ -97,7 +97,7 @@ The property is equivalent to the following value function ```math \begin{aligned} V^{\pi, \eta}_0(s) &= \mathbf{1}_{G}(s)\\ - V^{\pi, \eta}_k(s) &= \mathbf{1}_{G}(s) + \mathbf{1}_{S \setminus (G \cup O)}(s) \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(s')] + V^{\pi, \eta}_k(s) &= \mathbf{1}_{G}(s) + \mathbf{1}_{S \setminus (G \cup O)}(s) \mathbb{E}_{t \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(t)] \end{aligned} ``` such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{reach-avoid}}(G, O, K) = V_K(s)``, where for ``K = \infty`` the adversary does not depend on time. @@ -135,7 +135,7 @@ which is equivalent with the following value function ```math \begin{aligned} V^{\pi, \eta}_0(s) &= \mathbf{1}_{G}(s)\\ - V^{\pi, \eta}_k(s) &= \mathbf{1}_{S \setminus O}(s)\mathbb{E}_{s' \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(s')] + V^{\pi, \eta}_k(s) &= \mathbf{1}_{S \setminus O}(s)\mathbb{E}_{t \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(t)] \end{aligned} ``` such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{exact-reach}}(G, K) = V_K(s)`` for a horizon ``K \in \mathbb{N}``. @@ -169,7 +169,7 @@ This property can by duality with reachability equivalently be states as ``\math ```math \begin{aligned} V^{\pi, \eta}_0(s) &= -\mathbf{1}_{O}(s)\\ - V^{\pi, \eta}_k(s) &= -\mathbf{1}_{O}(s) + \mathbf{1}_{S \setminus O}(s) \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(s')] + V^{\pi, \eta}_k(s) &= -\mathbf{1}_{O}(s) + \mathbf{1}_{S \setminus O}(s) \mathbb{E}_{t \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(t)] \end{aligned} ``` such that ``\mathbb{P}^{\pi, \eta}_{\mathrm{safe}}(G, K) = 1 + V_K(s)``, where for ``K = \infty`` the adversary does not depend on time. @@ -204,7 +204,7 @@ The property is equivalent to the following value function ```math \begin{aligned} V^{\pi, \eta}_0(s) &= r(s)\\ - V^{\pi, \eta}_k(s) &= r(s) + \nu \mathbb{E}_{s' \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(s')] + V^{\pi, \eta}_k(s) &= r(s) + \nu \mathbb{E}_{t \sim \eta(s, a, K - k)}[V^{\pi, \eta}_{k - 1}(t)] \end{aligned} ``` such that ``\mathbb{E}^{\pi,\eta}_{\mathrm{reward}}(r, \nu, K) = V_K(s)``, where for ``K = \infty`` the adversary does not depend on time. @@ -242,7 +242,7 @@ The property is equivalent to the following value function ```math \begin{aligned} V^{\pi, \eta}_0(s) &= \mathbf{1}_{S \setminus 0}(s)\\ - V^{\pi, \eta}_k(s) &= \mathbf{1}_{S \setminus O}(s) \left(1 + \mathbb{E}_{s' \sim \eta(s, a)}[V^{\pi, \eta}_{k - 1}(s')]\right) + V^{\pi, \eta}_k(s) &= \mathbf{1}_{S \setminus O}(s) \left(1 + \mathbb{E}_{t \sim \eta(s, a)}[V^{\pi, \eta}_{k - 1}(t)]\right) \end{aligned} ``` such that ``\mathbb{E}^{\pi,\eta}_{\mathrm{exit}}(O) = V_\infty(s)``. The adversary does not depend on time. @@ -307,9 +307,9 @@ Given an fRMDP ``M = (S, S_0, A, \mathcal{G}, \Gamma)`` and a labeling function - ``A`` is the set of actions, and - ``\Gamma' = \{\Gamma_{z, a}}_{z \in Z, a \in A}`` is the joint ambiguity set defined as ```math -\Gamma'_{z, a} = \{\gamma'_{z, a} \in \mathcal{D}(Z) : \exists \gamma_{s, a} \in \Gamma_{s, a} \text{ s.t. } \gamma'_{z, a}(z') = \mathbf{1}_{q'}(\delta(q, L(s'))) \gamma_{s, a}(s')\} +\Gamma'_{z, a} = \{\gamma'_{z, a} \in \mathcal{D}(Z) : \exists \gamma_{s, a} \in \Gamma_{s, a} \text{ s.t. } \gamma'_{z, a}(z') = \mathbf{1}_{q'}(\delta(q, L(t))) \gamma_{s, a}(t)\} ``` -where ``z = (s, q)`` and ``z' = (s', q')``. Then, the probability of generating a path, of length ``K \in \mathbb{N}``, in the fRMDP that is accepted by the DFA is formally defined as +where ``z = (s, q)`` and ``z' = (t, q')``. Then, the probability of generating a path, of length ``K \in \mathbb{N}``, in the fRMDP that is accepted by the DFA is formally defined as ```math \mathbb{P}^{\pi, \eta}_{\mathrm{dfa-reach}}(F, K) = \mathbb{P}^{\pi, \eta}_{M \otimes \mathcal{A}} \left[\omega \in \Omega : \omega[K] \in S \times F \right]. ``` @@ -326,11 +326,11 @@ Note that the product is never explicitly constructed, for three reasons: (i) th ```math \begin{aligned} V^{\pi, \eta}_0(s, q) &= \mathbf{1}_{F}(q)\\ - W^{\pi, \eta}_k(s', q) &= \mathbf{1}_{F}(q) + \mathbf{1}_{Q \setminus F}(q) V^{\pi, \eta}_{k - 1}(s', \delta(q, L(s')))\\ - V^{\pi, \eta}_k(s, q) &= \mathbb{E}_{s' \sim \eta(s, a, K - k)}[W^{\pi, \eta}_k(s', q)] + W^{\pi, \eta}_k(t, q) &= \mathbf{1}_{F}(q) + \mathbf{1}_{Q \setminus F}(q) V^{\pi, \eta}_{k - 1}(t, \delta(q, L(t)))\\ + V^{\pi, \eta}_k(s, q) &= \mathbb{E}_{t \sim \eta(s, a, K - k)}[W^{\pi, \eta}_k(t, q)] \end{aligned} ``` -Notice that ``W^{\pi, \eta}_k(s', q)`` is shared for all ``s \in S`` when updating ``V^{\pi, \eta}_k(s, q)``. This allows for efficient, cache-friendly implementations of the Bellman operator. The kernel for product processes merely forwards, for each DFA state ``q \in Q \setminus F``, the Bellman update to the underlying Bellman operator algorithm, which is chosen based on the fRMDP model type, e.g. IMDP or odIMDP, storage type, e.g. dense or sparse, and hardware, e.g. CPU or CUDA, for efficicency. +Notice that ``W^{\pi, \eta}_k(t, q)`` is shared for all ``s \in S`` when updating ``V^{\pi, \eta}_k(s, q)``. This allows for efficient, cache-friendly implementations of the Bellman operator. The kernel for product processes merely forwards, for each DFA state ``q \in Q \setminus F``, the Bellman update to the underlying Bellman operator algorithm, which is chosen based on the fRMDP model type, e.g. IMDP or odIMDP, storage type, e.g. dense or sparse, and hardware, e.g. CPU or CUDA, for efficicency. Example of constructing a product process: ```@setup product_process_example diff --git a/src/algorithms.jl b/src/algorithms.jl index aacaa922..19de7506 100644 --- a/src/algorithms.jl +++ b/src/algorithms.jl @@ -1,7 +1,7 @@ abstract type BellmanAlgorithm end struct OMaximization <: BellmanAlgorithm end Base.@kwdef struct LPMcCormickRelaxation{O} <: BellmanAlgorithm - lp_optimizer::O = HiGHS.Optimizer + lp_solver::O = HiGHS.Optimizer end struct VertexEnumeration <: BellmanAlgorithm end diff --git a/src/workspace.jl b/src/workspace.jl index aec37ef5..e8790cba 100644 --- a/src/workspace.jl +++ b/src/workspace.jl @@ -143,7 +143,7 @@ struct FactoredIntervalMcCormickWorkspace{M <: JuMP.Model, T <: Real, AT <: Abst end function FactoredIntervalMcCormickWorkspace(sys, alg) - model = JuMP.Model(alg.lp_optimizer) + model = JuMP.Model(alg.lp_solver) JuMP.set_silent(model) set_string_names_on_creation(model, false) From 7abb7cff7177ec7e03b14f704a3e046fe3f1944c Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 25 Sep 2025 17:39:44 +0200 Subject: [PATCH 38/71] Fix a todo in test/base/imdp.jl --- test/base/imdp.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/test/base/imdp.jl b/test/base/imdp.jl index a58489fd..f4440008 100644 --- a/test/base/imdp.jl +++ b/test/base/imdp.jl @@ -50,14 +50,21 @@ using IntervalMDP mdp = IntervalMarkovDecisionProcess(transition_probs) @testset "bellman" begin - # TODO: Add tests with min upper bound V = N[1, 2, 3] + Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true) @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] Vres = similar(Vres) IntervalMDP.bellman!(Vres, V, mdp; upper_bound = false, maximize = true) @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] + + Vres = IntervalMDP.bellman(V, mdp; upper_bound = true, maximize = false) + @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (1//5) * 1 + (2//5) * 2 + (2//5) * 3, 1 * 3] + + Vres = similar(Vres) + IntervalMDP.bellman!(Vres, V, mdp; upper_bound = true, maximize = false) + @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (1//5) * 1 + (2//5) * 2 + (2//5) * 3, 1 * 3] end @testset "explicit sink state" begin From 582ca5f44c9212c2695fd21dcfc6353f94c99ef2 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 25 Sep 2025 17:41:32 +0200 Subject: [PATCH 39/71] Fix typo --- docs/src/specifications.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/specifications.md b/docs/src/specifications.md index 916e44b3..5acb76dc 100644 --- a/docs/src/specifications.md +++ b/docs/src/specifications.md @@ -20,7 +20,7 @@ spec = Specification(prop, Optimistic) # Explicit strategy mode (minimize/maxize) spec = Specification(prop, Pessimistic, Maximize) spec = Specification(prop, Pessimistic, Minimize) # Unusual, but available -spec = Specification(prop, Optimistic, Maximize) # Unusual, but avialable +spec = Specification(prop, Optimistic, Maximize) # Unusual, but available spec = Specification(prop, Optimistic, Minimize) ``` From c5a816990a8ad50210cf9811dc308687d74d9f80 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 25 Sep 2025 17:44:18 +0200 Subject: [PATCH 40/71] Fix math mode in documentation of complex properties --- docs/src/specifications.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/specifications.md b/docs/src/specifications.md index 5acb76dc..d83e379d 100644 --- a/docs/src/specifications.md +++ b/docs/src/specifications.md @@ -305,7 +305,7 @@ Given an fRMDP ``M = (S, S_0, A, \mathcal{G}, \Gamma)`` and a labeling function - ``Z = S \times Q`` is the set of product states, - ``Z_0 = S_0 \times \{q_0\}`` is the set of initial product states, - ``A`` is the set of actions, and -- ``\Gamma' = \{\Gamma_{z, a}}_{z \in Z, a \in A}`` is the joint ambiguity set defined as +- ``\Gamma' = \{\Gamma_{z, a}\}_{z \in Z, a \in A}`` is the joint ambiguity set defined as ```math \Gamma'_{z, a} = \{\gamma'_{z, a} \in \mathcal{D}(Z) : \exists \gamma_{s, a} \in \Gamma_{s, a} \text{ s.t. } \gamma'_{z, a}(z') = \mathbf{1}_{q'}(\delta(q, L(t))) \gamma_{s, a}(t)\} ``` From ee61bb00f7ebc4f07b3dd3f0cb48bb266bfa5393 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 25 Sep 2025 17:45:30 +0200 Subject: [PATCH 41/71] Clarify --- docs/src/specifications.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/specifications.md b/docs/src/specifications.md index d83e379d..d6398cdf 100644 --- a/docs/src/specifications.md +++ b/docs/src/specifications.md @@ -301,7 +301,7 @@ convergence_threshold = 1e-8 prop = InfiniteTimeDFAReachability(accepting_states, convergence_threshold) ``` -Given an fRMDP ``M = (S, S_0, A, \mathcal{G}, \Gamma)`` and a labeling function ``L : S \to \Sigma`` that maps states of the fRMDP to symbols in the alphabet of the DFA, a path ``\omega = s_0 s_1 \ldots`` in the fRMDP produces a word ``L(s_0) L(s_1) \ldots`` that is accepted by the DFA. The probability of producing a path in the fRMDP that is accepted by the DFA can be expressed via the product construction ``M \otimes \mathcal{A} = (Z, Z_0, A, \Gamma')``, where +Given an fRMDP ``M = (S, S_0, A, \mathcal{G}, \Gamma)`` and a labeling function ``L : S \to \Sigma`` that maps states of the fRMDP to symbols in the alphabet of the DFA, a path ``\omega = s_0 s_1 \ldots`` in the fRMDP produces a word ``L(s_0) L(s_1) \ldots`` that is (possibly) accepted by the DFA. The probability of producing a path in the fRMDP that is accepted by the DFA can be expressed via the product construction ``M \otimes \mathcal{A} = (Z, Z_0, A, \Gamma')``, where - ``Z = S \times Q`` is the set of product states, - ``Z_0 = S_0 \times \{q_0\}`` is the set of initial product states, - ``A`` is the set of actions, and From 32c5c6734ac6f1cd6ee70b9de96403d2b2b07ad7 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Thu, 25 Sep 2025 18:19:59 +0200 Subject: [PATCH 42/71] Narrow the exposed interface --- docs/src/reference/solve.md | 1 + docs/src/reference/specifications.md | 29 +-- docs/src/reference/systems.md | 41 ++- src/Data/bmdp-tool.jl | 6 +- src/IntervalMDP.jl | 3 +- src/models/DFA.jl | 2 +- src/models/models.jl | 2 +- src/robust_value_iteration.jl | 4 +- src/specification.jl | 368 +++++++-------------------- test/base/specification.jl | 37 +-- 10 files changed, 140 insertions(+), 353 deletions(-) diff --git a/docs/src/reference/solve.md b/docs/src/reference/solve.md index 74b657c8..a4476aef 100644 --- a/docs/src/reference/solve.md +++ b/docs/src/reference/solve.md @@ -5,6 +5,7 @@ solve residual num_iterations value_function +strategy(res::IntervalMDP.ControlSynthesisSolution) StationaryStrategy TimeVaryingStrategy ``` diff --git a/docs/src/reference/specifications.md b/docs/src/reference/specifications.md index 798f90cd..1eb2504c 100644 --- a/docs/src/reference/specifications.md +++ b/docs/src/reference/specifications.md @@ -5,12 +5,9 @@ VerificationProblem ControlSynthesisProblem system specification -strategy +strategy(prob::VerificationProblem) Specification system_property -Property -BasicProperty -ProductProperty satisfaction_mode SatisfactionMode strategy_mode @@ -21,14 +18,10 @@ StrategyMode ```@docs FiniteTimeDFAReachability -isfinitetime(prop::FiniteTimeDFAReachability) -terminal_states(prop::FiniteTimeDFAReachability) reach(prop::FiniteTimeDFAReachability) time_horizon(prop::FiniteTimeDFAReachability) InfiniteTimeDFAReachability -isfinitetime(prop::InfiniteTimeDFAReachability) -terminal_states(prop::InfiniteTimeDFAReachability) reach(prop::InfiniteTimeDFAReachability) convergence_eps(prop::InfiniteTimeDFAReachability) ``` @@ -37,20 +30,14 @@ convergence_eps(prop::InfiniteTimeDFAReachability) ```@docs FiniteTimeReachability -isfinitetime(prop::FiniteTimeReachability) -terminal_states(prop::FiniteTimeReachability) reach(prop::FiniteTimeReachability) time_horizon(prop::FiniteTimeReachability) InfiniteTimeReachability -isfinitetime(prop::InfiniteTimeReachability) -terminal_states(prop::InfiniteTimeReachability) reach(prop::InfiniteTimeReachability) convergence_eps(prop::InfiniteTimeReachability) ExactTimeReachability -isfinitetime(prop::ExactTimeReachability) -terminal_states(prop::ExactTimeReachability) reach(prop::ExactTimeReachability) time_horizon(prop::ExactTimeReachability) ``` @@ -59,22 +46,16 @@ time_horizon(prop::ExactTimeReachability) ```@docs FiniteTimeReachAvoid -isfinitetime(prop::FiniteTimeReachAvoid) -terminal_states(prop::FiniteTimeReachAvoid) reach(prop::FiniteTimeReachAvoid) avoid(prop::FiniteTimeReachAvoid) time_horizon(prop::FiniteTimeReachAvoid) InfiniteTimeReachAvoid -isfinitetime(prop::InfiniteTimeReachAvoid) -terminal_states(prop::InfiniteTimeReachAvoid) reach(prop::InfiniteTimeReachAvoid) avoid(prop::InfiniteTimeReachAvoid) convergence_eps(prop::InfiniteTimeReachAvoid) ExactTimeReachAvoid -isfinitetime(prop::ExactTimeReachAvoid) -terminal_states(prop::ExactTimeReachAvoid) reach(prop::ExactTimeReachAvoid) avoid(prop::ExactTimeReachAvoid) time_horizon(prop::ExactTimeReachAvoid) @@ -84,14 +65,10 @@ time_horizon(prop::ExactTimeReachAvoid) ```@docs FiniteTimeSafety -isfinitetime(prop::FiniteTimeSafety) -terminal_states(prop::FiniteTimeSafety) avoid(prop::FiniteTimeSafety) time_horizon(prop::FiniteTimeSafety) InfiniteTimeSafety -isfinitetime(prop::InfiniteTimeSafety) -terminal_states(prop::InfiniteTimeSafety) avoid(prop::InfiniteTimeSafety) convergence_eps(prop::InfiniteTimeSafety) ``` @@ -100,13 +77,11 @@ convergence_eps(prop::InfiniteTimeSafety) ```@docs FiniteTimeReward -isfinitetime(prop::FiniteTimeReward) reward(prop::FiniteTimeReward) discount(prop::FiniteTimeReward) time_horizon(prop::FiniteTimeReward) InfiniteTimeReward -isfinitetime(prop::InfiniteTimeReward) reward(prop::InfiniteTimeReward) discount(prop::InfiniteTimeReward) convergence_eps(prop::InfiniteTimeReward) @@ -116,8 +91,6 @@ convergence_eps(prop::InfiniteTimeReward) ```@docs ExpectedExitTime -isfinitetime(prop::ExpectedExitTime) -terminal_states(prop::ExpectedExitTime) avoid(prop::ExpectedExitTime) convergence_eps(prop::ExpectedExitTime) ``` \ No newline at end of file diff --git a/docs/src/reference/systems.md b/docs/src/reference/systems.md index 248dde6c..9d84faa1 100644 --- a/docs/src/reference/systems.md +++ b/docs/src/reference/systems.md @@ -1,10 +1,9 @@ # System representation ```@docs -IntervalMarkovProcess num_states num_actions -initial_states(mp::IntervalMarkovProcess) +initial_states AllStates ``` @@ -16,26 +15,12 @@ action_variables(s::FactoredRobustMarkovDecisionProcess) marginals(s::FactoredRobustMarkovDecisionProcess) ``` -## Convenience constructors for subclasses of fRMDPs +### Convenience constructors for subclasses of fRMDPs ```@docs IntervalMarkovChain IntervalMarkovDecisionProcess ``` -## Deterministic Finite Automaton (DFA) -```@docs -DFA -num_states(dfa::DFA) -num_labels(dfa::DFA) -transition(dfa::DFA) -labelmap(dfa::DFA) -initial_state(dfa::DFA) -ProductProcess -markov_process(proc::ProductProcess) -automaton(proc::ProductProcess) -labelling_function(proc::ProductProcess) -``` - ## Probability representation ```@docs Marginal @@ -61,11 +46,18 @@ upper gap ``` -### Labelling of IMDP states to Automaton alphabet +## Deterministic Finite Automaton (DFA) ```@docs -LabellingFunction -mapping(labelling_func::LabellingFunction) -num_labels(labelling_func::LabellingFunction) +DFA +num_states(dfa::DFA) +num_labels(dfa::DFA) +transition(dfa::DFA) +labelmap(dfa::DFA) +initial_state(dfa::DFA) +ProductProcess +markov_process(proc::ProductProcess) +automaton(proc::ProductProcess) +labelling_function(proc::ProductProcess) ``` ### Transition function for DFA @@ -74,4 +66,11 @@ TransitionFunction transition(transition_func::TransitionFunction) num_states(tf::TransitionFunction) num_labels(tf::TransitionFunction) +``` + +### Labelling of IMDP states to Automaton alphabet +```@docs +LabellingFunction +mapping(labelling_func::LabellingFunction) +num_labels(labelling_func::LabellingFunction) ``` \ No newline at end of file diff --git a/src/Data/bmdp-tool.jl b/src/Data/bmdp-tool.jl index 6f60e432..d72428ef 100644 --- a/src/Data/bmdp-tool.jl +++ b/src/Data/bmdp-tool.jl @@ -147,7 +147,7 @@ write_bmdp_tool_file(path, problem::IntervalMDP.AbstractIntervalMDPProblem) = """ write_bmdp_tool_file(path, mdp::IntervalMarkovProcess, spec::Specification) """ -write_bmdp_tool_file(path, mdp::IntervalMarkovProcess, spec::Specification) = +write_bmdp_tool_file(path, mdp::IntervalMDP.IntervalMarkovProcess, spec::Specification) = write_bmdp_tool_file(path, mdp, system_property(spec)) """ @@ -155,7 +155,7 @@ write_bmdp_tool_file(path, mdp::IntervalMarkovProcess, spec::Specification) = """ write_bmdp_tool_file( path, - mdp::IntervalMarkovProcess, + mdp::IntervalMDP.IntervalMarkovProcess, prop::IntervalMDP.AbstractReachability, ) = write_bmdp_tool_file(path, mdp, reach(prop)) @@ -164,7 +164,7 @@ write_bmdp_tool_file( """ write_bmdp_tool_file( path, - mdp::IntervalMarkovProcess, + mdp::IntervalMDP.IntervalMarkovProcess, terminal_states::Vector{T}, ) where {T} = write_bmdp_tool_file(path, mdp, CartesianIndex.(terminal_states)) diff --git a/src/IntervalMDP.jl b/src/IntervalMDP.jl index 6af2d757..261e0a8a 100644 --- a/src/IntervalMDP.jl +++ b/src/IntervalMDP.jl @@ -34,8 +34,7 @@ export FiniteTimeSafety, InfiniteTimeSafety export FiniteTimeReward, InfiniteTimeReward export ExpectedExitTime -export isfinitetime -export reach, avoid, safe, terminal_states, time_horizon, convergence_eps, reward, discount +export reach, avoid, safe, time_horizon, convergence_eps, reward, discount export SatisfactionMode, Pessimistic, Optimistic, ispessimistic, isoptimistic export StrategyMode, Maximize, Minimize, ismaximize, isminimize diff --git a/src/models/DFA.jl b/src/models/DFA.jl index c98b2f7d..febbbcac 100644 --- a/src/models/DFA.jl +++ b/src/models/DFA.jl @@ -15,7 +15,7 @@ Formally, let ``(Q, 2^{AP}, \\delta, q_0, Q_{ac})`` be an DFA, where - ``\\delta : |Q| \\times |2^{AP}| => |Q|`` is the deterministic transition function, for each state-input pair. Then the `DFA` type is defined as follows: indices `1:num_states` are the states in ``Q``, -`transition` represents ``\\delta``, the set ``2^{AP}`` is , and `initial_states` is the set of initial states ``q_0``. +`transition` represents ``\\delta``, the set ``2^{AP}`` is , and `initial_state` is the initial state ``q_0``. See [`TransitionFunction`](@ref) for more information on the structure of the transition function. ### Fields diff --git a/src/models/models.jl b/src/models/models.jl index 43e0cb92..5fe531e4 100644 --- a/src/models/models.jl +++ b/src/models/models.jl @@ -1,7 +1,7 @@ abstract type StochasticProcess end include("IntervalMarkovProcess.jl") -export IntervalMarkovProcess, AllStates +export AllStates export num_states, num_actions, initial_states include("FactoredRobustMarkovDecisionProcess.jl") diff --git a/src/robust_value_iteration.jl b/src/robust_value_iteration.jl index dca57e08..f6b6b126 100644 --- a/src/robust_value_iteration.jl +++ b/src/robust_value_iteration.jl @@ -66,9 +66,9 @@ transition_probs = [prob1, prob2, prob3] initial_state = 1 mdp = IntervalMarkovDecisionProcess(transition_probs, initial_state) -terminal_states = [3] +reach_states = [3] time_horizon = 10 -prop = FiniteTimeReachability(terminal_states, time_horizon) +prop = FiniteTimeReachability(reach_states, time_horizon) spec = Specification(prop, Pessimistic, Maximize) ### Verification diff --git a/src/specification.jl b/src/specification.jl index 9a5f0746..683e1a03 100644 --- a/src/specification.jl +++ b/src/specification.jl @@ -1,10 +1,5 @@ ### Property types -""" - Property - -Super type for all system Property -""" abstract type Property end function checkmodelpropertycompatibility(prop, system) @@ -17,24 +12,12 @@ end Base.show(io::IO, mime::MIME"text/plain", prop::Property) = showproperty(io, "", "", prop) -""" - BasicProperty - -A basic property that applies to a "raw" [`IntervalMarkovProcess`](@ref). -""" abstract type BasicProperty <: Property end - function checkmodelpropertycompatibility(::BasicProperty, ::IntervalMarkovProcess) return nothing end -""" - ProductProperty - -A property that applies to a [`ProductProcess`](@ref). -""" abstract type ProductProperty <: Property end - function checkmodelpropertycompatibility(::ProductProperty, ::ProductProcess) return nothing end @@ -128,13 +111,13 @@ the property is """ struct FiniteTimeDFAReachability{VT <: Vector{<:Int32}, T <: Integer} <: AbstractDFAReachability - terminal_states::VT + reach::VT time_horizon::T end -function FiniteTimeDFAReachability(terminal_states::Vector{<:Integer}, time_horizon) - terminal_states = Int32.(terminal_states) - return FiniteTimeDFAReachability(terminal_states, time_horizon) +function FiniteTimeDFAReachability(reach::Vector{<:Integer}, time_horizon) + reach = Int32.(reach) + return FiniteTimeDFAReachability(reach, time_horizon) end function checkproperty(prop::FiniteTimeDFAReachability, system, strategy) @@ -143,43 +126,30 @@ function checkproperty(prop::FiniteTimeDFAReachability, system, strategy) end function checkproperty(prop::FiniteTimeDFAReachability, system) - checkstatebounds(terminal_states(prop), system) + checkstatebounds(reach(prop), system) end -""" - isfinitetime(prop::FiniteTimeDFAReachability) -Return `true` for FiniteTimeDFAReachability. -""" isfinitetime(prop::FiniteTimeDFAReachability) = true """ time_horizon(prop::FiniteTimeDFAReachability) -Return the time horizon of a finite time reachability property. +Return the time horizon of a finite time DFA reachability property. """ time_horizon(prop::FiniteTimeDFAReachability) = prop.time_horizon -""" - terminal_states(spec::FiniteTimeDFAReachability) - -Return the set of terminal states of a finite time reachability property. -""" -terminal_states(prop::FiniteTimeDFAReachability) = prop.terminal_states - """ reach(prop::FiniteTimeDFAReachability) -Return the set of states with which to compute reachbility for a finite time reachability prop. -This is equivalent for [`terminal_states(prop::FiniteTimeDFAReachability)`](@ref) for a DFA reachability -property. +Return the set of DFA states with respect to which to compute reachbility for a finite time DFA reachability property. """ -reach(prop::FiniteTimeDFAReachability) = prop.terminal_states +reach(prop::FiniteTimeDFAReachability) = prop.reach -function showproperty(io::IO, first_prefix, prefix, spec::FiniteTimeDFAReachability) +function showproperty(io::IO, first_prefix, prefix, prop::FiniteTimeDFAReachability) println(io, first_prefix, styled"{code:FiniteTimeDFAReachability}") - println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(spec))}") - println(io, prefix, styled"└─ Reach states: {magenta:$(reach(spec))}") + println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(prop))}") + println(io, prefix, styled"└─ Reach states: {magenta:$(reach(prop))}") end """ @@ -191,13 +161,13 @@ The convergence threshold is that the largest value of the most recent Bellman r """ struct InfiniteTimeDFAReachability{R <: Real, VT <: Vector{<:Int32}} <: AbstractDFAReachability - terminal_states::VT + reach::VT convergence_eps::R end -function InfiniteTimeDFAReachability(terminal_states::Vector{<:Integer}, convergence_eps) - terminal_states = Int32.(terminal_states) - return InfiniteTimeDFAReachability(terminal_states, convergence_eps) +function InfiniteTimeDFAReachability(reach::Vector{<:Integer}, convergence_eps) + reach = Int32.(reach) + return InfiniteTimeDFAReachability(reach, convergence_eps) end function checkproperty(prop::InfiniteTimeDFAReachability, system, strategy) @@ -206,43 +176,29 @@ function checkproperty(prop::InfiniteTimeDFAReachability, system, strategy) end function checkproperty(prop::InfiniteTimeDFAReachability, system) - checkstatebounds(terminal_states(prop), system) + checkstatebounds(reach(prop), system) end -""" - isfinitetime(prop::InfiniteTimeDFAReachability) - -Return `false` for InfiniteTimeDFAReachability. -""" isfinitetime(prop::InfiniteTimeDFAReachability) = false """ convergence_eps(prop::InfiniteTimeDFAReachability) -Return the convergence threshold of an infinite time reachability property. +Return the convergence threshold of an infinite time DFA reachability property. """ convergence_eps(prop::InfiniteTimeDFAReachability) = prop.convergence_eps -""" - terminal_states(prop::InfiniteTimeDFAReachability) - -Return the set of terminal states of an infinite time reachability property. -""" -terminal_states(prop::InfiniteTimeDFAReachability) = prop.terminal_states - """ reach(prop::InfiniteTimeDFAReachability) -Return the set of states with which to compute reachbility for a infinite time reachability property. -This is equivalent for [`terminal_states(prop::InfiniteTimeDFAReachability)`](@ref) for a DFA reachability -property. +Return the set of DFA states with respect to which to compute reachbility for a infinite time DFA reachability property. """ -reach(prop::InfiniteTimeDFAReachability) = prop.terminal_states +reach(prop::InfiniteTimeDFAReachability) = prop.reach -function showproperty(io::IO, first_prefix, prefix, spec::InfiniteTimeDFAReachability) +function showproperty(io::IO, first_prefix, prefix, prop::InfiniteTimeDFAReachability) println(io, first_prefix, styled"{code:InfiniteTimeDFAReachability}") - println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(spec))}") - println(io, prefix, styled"└─ Reach states: {magenta:$(reach(spec))}") + println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}") + println(io, prefix, styled"└─ Reach states: {magenta:$(reach(prop))}") end ## Reachability @@ -276,13 +232,13 @@ the property is """ struct FiniteTimeReachability{VT <: Vector{<:CartesianIndex}, T <: Integer} <: AbstractReachability - terminal_states::VT + reach::VT time_horizon::T end -function FiniteTimeReachability(terminal_states::Vector{<:UnionIndex}, time_horizon) - terminal_states = CartesianIndex.(terminal_states) - return FiniteTimeReachability(terminal_states, time_horizon) +function FiniteTimeReachability(reach::Vector{<:UnionIndex}, time_horizon) + reach = CartesianIndex.(reach) + return FiniteTimeReachability(reach, time_horizon) end function checkproperty(prop::FiniteTimeReachability, system, strategy) @@ -291,14 +247,9 @@ function checkproperty(prop::FiniteTimeReachability, system, strategy) end function checkproperty(prop::FiniteTimeReachability, system) - checkstatebounds(terminal_states(prop), system) + checkstatebounds(reach(prop), system) end -""" - isfinitetime(prop::FiniteTimeReachability) - -Return `true` for FiniteTimeReachability. -""" isfinitetime(prop::FiniteTimeReachability) = true """ @@ -308,27 +259,17 @@ Return the time horizon of a finite time reachability property. """ time_horizon(prop::FiniteTimeReachability) = prop.time_horizon -""" - terminal_states(spec::FiniteTimeReachability) - -Return the set of terminal states of a finite time reachability property. -""" -terminal_states(prop::FiniteTimeReachability) = prop.terminal_states - """ reach(prop::FiniteTimeReachability) -Return the set of states with which to compute reachbility for a finite time reachability prop. -This is equivalent for [`terminal_states(prop::FiniteTimeReachability)`](@ref) for a regular reachability -property. See [`FiniteTimeReachAvoid`](@ref) for a more complex property where the reachability and -terminal states differ. +Return the set of states with respect to which to compute reachbility for a finite time reachability property. """ -reach(prop::FiniteTimeReachability) = prop.terminal_states +reach(prop::FiniteTimeReachability) = prop.reach -function showproperty(io::IO, first_prefix, prefix, spec::FiniteTimeReachability) +function showproperty(io::IO, first_prefix, prefix, prop::FiniteTimeReachability) println(io, first_prefix, styled"{code:FiniteTimeReachability}") - println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(spec))}") - println(io, prefix, styled"└─ Reach states: {magenta:$(reach(spec))}") + println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(prop))}") + println(io, prefix, styled"└─ Reach states: {magenta:$(reach(prop))}") end """ @@ -340,13 +281,13 @@ The convergence threshold is that the largest value of the most recent Bellman r """ struct InfiniteTimeReachability{R <: Real, VT <: Vector{<:CartesianIndex}} <: AbstractReachability - terminal_states::VT + reach::VT convergence_eps::R end -function InfiniteTimeReachability(terminal_states::Vector{<:UnionIndex}, convergence_eps) - terminal_states = CartesianIndex.(terminal_states) - return InfiniteTimeReachability(terminal_states, convergence_eps) +function InfiniteTimeReachability(reach::Vector{<:UnionIndex}, convergence_eps) + reach = CartesianIndex.(reach) + return InfiniteTimeReachability(reach, convergence_eps) end function checkproperty(prop::InfiniteTimeReachability, system, strategy) @@ -355,14 +296,9 @@ function checkproperty(prop::InfiniteTimeReachability, system, strategy) end function checkproperty(prop::InfiniteTimeReachability, system) - checkstatebounds(terminal_states(prop), system) + checkstatebounds(reach(prop), system) end -""" - isfinitetime(prop::InfiniteTimeReachability) - -Return `false` for InfiniteTimeReachability. -""" isfinitetime(prop::InfiniteTimeReachability) = false """ @@ -372,27 +308,17 @@ Return the convergence threshold of an infinite time reachability property. """ convergence_eps(prop::InfiniteTimeReachability) = prop.convergence_eps -""" - terminal_states(prop::InfiniteTimeReachability) - -Return the set of terminal states of an infinite time reachability property. -""" -terminal_states(prop::InfiniteTimeReachability) = prop.terminal_states - """ reach(prop::InfiniteTimeReachability) Return the set of states with which to compute reachbility for a infinite time reachability property. -This is equivalent for [`terminal_states(prop::InfiniteTimeReachability)`](@ref) for a regular reachability -property. See [`InfiniteTimeReachAvoid`](@ref) for a more complex property where the reachability and -terminal states differ. """ -reach(prop::InfiniteTimeReachability) = prop.terminal_states +reach(prop::InfiniteTimeReachability) = prop.reach -function showproperty(io::IO, first_prefix, prefix, spec::InfiniteTimeReachability) +function showproperty(io::IO, first_prefix, prefix, prop::InfiniteTimeReachability) println(io, first_prefix, styled"{code:InfiniteTimeReachability}") - println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(spec))}") - println(io, prefix, styled"└─ Reach states: {magenta:$(reach(spec))}") + println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}") + println(io, prefix, styled"└─ Reach states: {magenta:$(reach(prop))}") end """ @@ -407,13 +333,13 @@ the property is """ struct ExactTimeReachability{VT <: Vector{<:CartesianIndex}, T <: Integer} <: AbstractReachability - terminal_states::VT + reach::VT time_horizon::T end -function ExactTimeReachability(terminal_states::Vector{<:UnionIndex}, time_horizon) - terminal_states = CartesianIndex.(terminal_states) - return ExactTimeReachability(terminal_states, time_horizon) +function ExactTimeReachability(reach::Vector{<:UnionIndex}, time_horizon) + reach = CartesianIndex.(reach) + return ExactTimeReachability(reach, time_horizon) end function checkproperty(prop::ExactTimeReachability, system, strategy) @@ -422,18 +348,13 @@ function checkproperty(prop::ExactTimeReachability, system, strategy) end function checkproperty(prop::ExactTimeReachability, system) - checkstatebounds(terminal_states(prop), system) + checkstatebounds(reach(prop), system) end function step_postprocess_value_function!(_, ::ExactTimeReachability) return nothing end -""" - isfinitetime(prop::ExactTimeReachability) - -Return `true` for ExactTimeReachability. -""" isfinitetime(prop::ExactTimeReachability) = true """ @@ -443,27 +364,17 @@ Return the time horizon of an exact time reachability property. """ time_horizon(prop::ExactTimeReachability) = prop.time_horizon -""" - terminal_states(spec::ExactTimeReachability) - -Return the set of terminal states of an exact time reachability property. -""" -terminal_states(prop::ExactTimeReachability) = prop.terminal_states - """ reach(prop::ExactTimeReachability) Return the set of states with which to compute reachbility for an exact time reachability prop. -This is equivalent for [`terminal_states(prop::ExactTimeReachability)`](@ref) for a regular reachability -property. See [`ExactTimeReachAvoid`](@ref) for a more complex property where the reachability and -terminal states differ. """ -reach(prop::ExactTimeReachability) = prop.terminal_states +reach(prop::ExactTimeReachability) = prop.reach -function showproperty(io::IO, first_prefix, prefix, spec::ExactTimeReachability) +function showproperty(io::IO, first_prefix, prefix, prop::ExactTimeReachability) println(io, first_prefix, styled"{code:ExactTimeReachability}") - println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(spec))}") - println(io, prefix, styled"└─ Reach states: {magenta:$(reach(spec))}") + println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(prop))}") + println(io, prefix, styled"└─ Reach states: {magenta:$(reach(prop))}") end ## Reach-avoid @@ -545,15 +456,11 @@ function checkproperty(prop::FiniteTimeReachAvoid, system, strategy) end function checkproperty(prop::FiniteTimeReachAvoid, system) - checkstatebounds(terminal_states(prop), system) + checkstatebounds(reach(prop), system) + checkstatebounds(avoid(prop), system) checkdisjoint(reach(prop), avoid(prop)) end -""" - isfinitetime(prop::FiniteTimeReachAvoid) - -Return `true` for FiniteTimeReachAvoid. -""" isfinitetime(prop::FiniteTimeReachAvoid) = true """ @@ -563,14 +470,6 @@ Return the time horizon of a finite time reach-avoid property. """ time_horizon(prop::FiniteTimeReachAvoid) = prop.time_horizon -""" - terminal_states(prop::FiniteTimeReachAvoid) - -Return the set of terminal states of a finite time reach-avoid property. -That is, the union of the reach and avoid sets. -""" -terminal_states(prop::FiniteTimeReachAvoid) = [prop.reach; prop.avoid] - """ reach(prop::FiniteTimeReachAvoid) @@ -585,11 +484,11 @@ Return the set of states to avoid. """ avoid(prop::FiniteTimeReachAvoid) = prop.avoid -function showproperty(io::IO, first_prefix, prefix, spec::FiniteTimeReachAvoid) +function showproperty(io::IO, first_prefix, prefix, prop::FiniteTimeReachAvoid) println(io, first_prefix, styled"{code:FiniteTimeReachAvoid}") - println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(spec))}") - println(io, prefix, styled"├─ Reach states: {magenta:$(reach(spec))}") - println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(spec))}") + println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(prop))}") + println(io, prefix, styled"├─ Reach states: {magenta:$(reach(prop))}") + println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(prop))}") end """ @@ -620,15 +519,11 @@ function checkproperty(prop::InfiniteTimeReachAvoid, system, strategy) end function checkproperty(prop::InfiniteTimeReachAvoid, system) - checkstatebounds(terminal_states(prop), system) + checkstatebounds(reach(prop), system) + checkstatebounds(avoid(prop), system) checkdisjoint(reach(prop), avoid(prop)) end -""" - isfinitetime(prop::InfiniteTimeReachAvoid) - -Return `false` for InfiniteTimeReachAvoid. -""" isfinitetime(prop::InfiniteTimeReachAvoid) = false """ @@ -638,14 +533,6 @@ Return the convergence threshold of an infinite time reach-avoid property. """ convergence_eps(prop::InfiniteTimeReachAvoid) = prop.convergence_eps -""" - terminal_states(prop::InfiniteTimeReachAvoid) - -Return the set of terminal states of an infinite time reach-avoid property. -That is, the union of the reach and avoid sets. -""" -terminal_states(prop::InfiniteTimeReachAvoid) = [prop.reach; prop.avoid] - """ reach(prop::InfiniteTimeReachAvoid) @@ -660,11 +547,11 @@ Return the set of states to avoid. """ avoid(prop::InfiniteTimeReachAvoid) = prop.avoid -function showproperty(io::IO, first_prefix, prefix, spec::InfiniteTimeReachAvoid) +function showproperty(io::IO, first_prefix, prefix, prop::InfiniteTimeReachAvoid) println(io, first_prefix, styled"{code:InfiniteTimeReachAvoid}") - println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(spec))}") - println(io, prefix, styled"├─ Reach states: {magenta:$(reach(spec))}") - println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(spec))}") + println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}") + println(io, prefix, styled"├─ Reach states: {magenta:$(reach(prop))}") + println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(prop))}") end """ @@ -700,7 +587,8 @@ function checkproperty(prop::ExactTimeReachAvoid, system, strategy) end function checkproperty(prop::ExactTimeReachAvoid, system) - checkstatebounds(terminal_states(prop), system) + checkstatebounds(reach(prop), system) + checkstatebounds(avoid(prop), system) checkdisjoint(reach(prop), avoid(prop)) end @@ -708,11 +596,6 @@ function step_postprocess_value_function!(value_function, prop::ExactTimeReachAv @inbounds value_function.current[avoid(prop)] .= 0.0 end -""" - isfinitetime(prop::ExactTimeReachAvoid) - -Return `true` for ExactTimeReachAvoid. -""" isfinitetime(prop::ExactTimeReachAvoid) = true """ @@ -722,14 +605,6 @@ Return the time horizon of an exact time reach-avoid property. """ time_horizon(prop::ExactTimeReachAvoid) = prop.time_horizon -""" - terminal_states(prop::ExactTimeReachAvoid) - -Return the set of terminal states of an exact time reach-avoid property. -That is, the union of the reach and avoid sets. -""" -terminal_states(prop::ExactTimeReachAvoid) = [prop.reach; prop.avoid] - """ reach(prop::ExactTimeReachAvoid) @@ -744,11 +619,11 @@ Return the set of states to avoid. """ avoid(prop::ExactTimeReachAvoid) = prop.avoid -function showproperty(io::IO, first_prefix, prefix, spec::ExactTimeReachAvoid) +function showproperty(io::IO, first_prefix, prefix, prop::ExactTimeReachAvoid) println(io, first_prefix, styled"{code:ExactTimeReachAvoid}") - println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(spec))}") - println(io, prefix, styled"├─ Reach states: {magenta:$(reach(spec))}") - println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(spec))}") + println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(prop))}") + println(io, prefix, styled"├─ Reach states: {magenta:$(reach(prop))}") + println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(prop))}") end ## Safety @@ -783,13 +658,13 @@ the property is ``` """ struct FiniteTimeSafety{VT <: Vector{<:CartesianIndex}, T <: Integer} <: AbstractSafety - avoid_states::VT + avoid::VT time_horizon::T end -function FiniteTimeSafety(avoid_states::Vector{<:UnionIndex}, time_horizon) - avoid_states = CartesianIndex.(avoid_states) - return FiniteTimeSafety(avoid_states, time_horizon) +function FiniteTimeSafety(avoid::Vector{<:UnionIndex}, time_horizon) + avoid = CartesianIndex.(avoid) + return FiniteTimeSafety(avoid, time_horizon) end function checkproperty(prop::FiniteTimeSafety, system, strategy) @@ -798,14 +673,9 @@ function checkproperty(prop::FiniteTimeSafety, system, strategy) end function checkproperty(prop::FiniteTimeSafety, system) - checkstatebounds(terminal_states(prop), system) + checkstatebounds(avoid(prop), system) end -""" - isfinitetime(prop::FiniteTimeSafety) - -Return `true` for FiniteTimeSafety. -""" isfinitetime(prop::FiniteTimeSafety) = true """ @@ -815,25 +685,17 @@ Return the time horizon of a finite time safety property. """ time_horizon(prop::FiniteTimeSafety) = prop.time_horizon -""" - terminal_states(spec::FiniteTimeSafety) - -Return the set of terminal states of a finite time safety property. -""" -terminal_states(prop::FiniteTimeSafety) = prop.avoid_states - """ avoid(prop::FiniteTimeSafety) Return the set of states with which to compute reachbility for a finite time reachability prop. -This is equivalent for [`terminal_states(prop::FiniteTimeSafety)`](@ref). """ -avoid(prop::FiniteTimeSafety) = prop.avoid_states +avoid(prop::FiniteTimeSafety) = prop.avoid -function showproperty(io::IO, first_prefix, prefix, spec::FiniteTimeSafety) +function showproperty(io::IO, first_prefix, prefix, prop::FiniteTimeSafety) println(io, first_prefix, styled"{code:FiniteTimeSafety}") - println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(spec))}") - println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(spec))}") + println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(prop))}") + println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(prop))}") end """ @@ -844,13 +706,13 @@ In practice it means, performing the value iteration until the value function ha The convergence threshold is that the largest value of the most recent Bellman residual is less than `convergence_eps`. """ struct InfiniteTimeSafety{R <: Real, VT <: Vector{<:CartesianIndex}} <: AbstractSafety - avoid_states::VT + avoid::VT convergence_eps::R end -function InfiniteTimeSafety(avoid_states::Vector{<:UnionIndex}, convergence_eps) - avoid_states = CartesianIndex.(avoid_states) - return InfiniteTimeSafety(avoid_states, convergence_eps) +function InfiniteTimeSafety(avoid::Vector{<:UnionIndex}, convergence_eps) + avoid = CartesianIndex.(avoid) + return InfiniteTimeSafety(avoid, convergence_eps) end function checkproperty(prop::InfiniteTimeSafety, system, strategy) @@ -859,14 +721,9 @@ function checkproperty(prop::InfiniteTimeSafety, system, strategy) end function checkproperty(prop::InfiniteTimeSafety, system) - checkstatebounds(terminal_states(prop), system) + checkstatebounds(avoid(prop), system) end -""" - isfinitetime(prop::InfiniteTimeSafety) - -Return `false` for InfiniteTimeSafety. -""" isfinitetime(prop::InfiniteTimeSafety) = false """ @@ -876,25 +733,17 @@ Return the convergence threshold of an infinite time safety property. """ convergence_eps(prop::InfiniteTimeSafety) = prop.convergence_eps -""" - terminal_states(prop::InfiniteTimeSafety) - -Return the set of terminal states of an infinite time safety property. -""" -terminal_states(prop::InfiniteTimeSafety) = prop.avoid_states - """ avoid(prop::InfiniteTimeSafety) Return the set of states with which to compute safety for a infinite time safety property. -This is equivalent for [`terminal_states(prop::InfiniteTimeSafety)`](@ref). """ -avoid(prop::InfiniteTimeSafety) = prop.avoid_states +avoid(prop::InfiniteTimeSafety) = prop.avoid -function showproperty(io::IO, first_prefix, prefix, spec::InfiniteTimeSafety) +function showproperty(io::IO, first_prefix, prefix, prop::InfiniteTimeSafety) println(io, first_prefix, styled"{code:InfiniteTimeSafety}") - println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(spec))}") - println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(spec))}") + println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}") + println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(prop))}") end ## Reward @@ -959,11 +808,6 @@ function checkproperty(prop::FiniteTimeReward, system) checkreward(prop, system) end -""" - isfinitetime(prop::FiniteTimeReward) - -Return `true` for FiniteTimeReward. -""" isfinitetime(prop::FiniteTimeReward) = true """ @@ -987,11 +831,11 @@ Return the time horizon of a finite time reward optimization. """ time_horizon(prop::FiniteTimeReward) = prop.time_horizon -function showproperty(io::IO, first_prefix, prefix, spec::FiniteTimeReward) +function showproperty(io::IO, first_prefix, prefix, prop::FiniteTimeReward) println(io, first_prefix, styled"{code:FiniteTimeReward}") - println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(spec))}") - println(io, prefix, styled"├─ Discount factor: {magenta:$(discount(spec))}") - println(io, prefix, styled"└─ Reward storage: {magenta:$(eltype(reward(spec))), $(size(reward(spec)))}") + println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(prop))}") + println(io, prefix, styled"├─ Discount factor: {magenta:$(discount(prop))}") + println(io, prefix, styled"└─ Reward storage: {magenta:$(eltype(reward(prop))), $(size(reward(prop)))}") end """ @@ -1028,11 +872,6 @@ function checkdiscountupperbound(prop::InfiniteTimeReward) end end -""" - isfinitetime(prop::InfiniteTimeReward) - -Return `false` for InfiniteTimeReward. -""" isfinitetime(prop::InfiniteTimeReward) = false """ @@ -1056,11 +895,11 @@ Return the convergence threshold of an infinite time reward optimization. """ convergence_eps(prop::InfiniteTimeReward) = prop.convergence_eps -function showproperty(io::IO, first_prefix, prefix, spec::InfiniteTimeReward) +function showproperty(io::IO, first_prefix, prefix, prop::InfiniteTimeReward) println(io, first_prefix, styled"{code:InfiniteTimeReward}") - println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(spec))}") - println(io, prefix, styled"├─ Discount factor: {magenta:$(discount(spec))}") - println(io, prefix, styled"└─ Reward storage: {magenta:$(eltype(reward(spec))), $(size(reward(spec)))}") + println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}") + println(io, prefix, styled"├─ Discount factor: {magenta:$(discount(prop))}") + println(io, prefix, styled"└─ Reward storage: {magenta:$(eltype(reward(prop))), $(size(reward(prop)))}") end ## Hitting time @@ -1117,25 +956,12 @@ function step_postprocess_value_function!(value_function, prop::ExpectedExitTime value_function.current[avoid(prop)] .= 0.0 end -""" - isfinitetime(prop::ExpectedExitTime) - -Return `true` for ExpectedExitTime. -""" isfinitetime(prop::ExpectedExitTime) = false -""" - terminal_states(prop::ExpectedExitTime) - -Return the set of terminal states of an expected hitting time property. -""" -terminal_states(prop::ExpectedExitTime) = prop.avoid_states - """ avoid(prop::ExpectedExitTime) Return the set of unsafe states that we compute the expected hitting time with respect to. -This is equivalent for [`terminal_states(prop::ExpectedExitTime)`](@ref). """ avoid(prop::ExpectedExitTime) = prop.avoid_states @@ -1146,10 +972,10 @@ Return the convergence threshold of an expected exit time. """ convergence_eps(prop::ExpectedExitTime) = prop.convergence_eps -function showproperty(io::IO, first_prefix, prefix, spec::ExpectedExitTime) +function showproperty(io::IO, first_prefix, prefix, prop::ExpectedExitTime) println(io, first_prefix, styled"{code:ExpectedExitTime}") - println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(spec))}") - println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(spec))}") + println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}") + println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(prop))}") end ## Problem diff --git a/test/base/specification.jl b/test/base/specification.jl index cebbec5a..8fee8601 100644 --- a/test/base/specification.jl +++ b/test/base/specification.jl @@ -4,95 +4,85 @@ using IntervalMDP @testset "getters" begin @testset "DFA reachability" begin prop = FiniteTimeDFAReachability([3], 10) - @test isfinitetime(prop) + @test IntervalMDP.isfinitetime(prop) @test time_horizon(prop) == 10 @test reach(prop) == [3] - @test terminal_states(prop) == [3] prop = InfiniteTimeDFAReachability([3], 1e-6) - @test !isfinitetime(prop) + @test !IntervalMDP.isfinitetime(prop) @test convergence_eps(prop) == 1e-6 @test reach(prop) == [3] - @test terminal_states(prop) == [3] end @testset "reachability" begin prop = FiniteTimeReachability([3], 10) - @test isfinitetime(prop) + @test IntervalMDP.isfinitetime(prop) @test time_horizon(prop) == 10 @test reach(prop) == [CartesianIndex(3)] - @test terminal_states(prop) == [CartesianIndex(3)] prop = InfiniteTimeReachability([3], 1e-6) - @test !isfinitetime(prop) + @test !IntervalMDP.isfinitetime(prop) @test convergence_eps(prop) == 1e-6 @test reach(prop) == [CartesianIndex(3)] - @test terminal_states(prop) == [CartesianIndex(3)] prop = ExactTimeReachability([3], 10) - @test isfinitetime(prop) + @test IntervalMDP.isfinitetime(prop) @test time_horizon(prop) == 10 @test reach(prop) == [CartesianIndex(3)] - @test terminal_states(prop) == [CartesianIndex(3)] end @testset "reach-avoid" begin prop = FiniteTimeReachAvoid([3], [4], 10) - @test isfinitetime(prop) + @test IntervalMDP.isfinitetime(prop) @test time_horizon(prop) == 10 @test reach(prop) == [CartesianIndex(3)] @test avoid(prop) == [CartesianIndex(4)] - @test issetequal(terminal_states(prop), [CartesianIndex(3), CartesianIndex(4)]) prop = InfiniteTimeReachAvoid([3], [4], 1e-6) - @test !isfinitetime(prop) + @test !IntervalMDP.isfinitetime(prop) @test convergence_eps(prop) == 1e-6 @test reach(prop) == [CartesianIndex(3)] @test avoid(prop) == [CartesianIndex(4)] - @test issetequal(terminal_states(prop), [CartesianIndex(3), CartesianIndex(4)]) prop = ExactTimeReachAvoid([3], [4], 10) - @test isfinitetime(prop) + @test IntervalMDP.isfinitetime(prop) @test time_horizon(prop) == 10 @test reach(prop) == [CartesianIndex(3)] @test avoid(prop) == [CartesianIndex(4)] - @test issetequal(terminal_states(prop), [CartesianIndex(3), CartesianIndex(4)]) end @testset "safety" begin prop = FiniteTimeSafety([3], 10) - @test isfinitetime(prop) + @test IntervalMDP.isfinitetime(prop) @test time_horizon(prop) == 10 @test avoid(prop) == [CartesianIndex(3)] - @test terminal_states(prop) == [CartesianIndex(3)] prop = InfiniteTimeSafety([3], 1e-6) - @test !isfinitetime(prop) + @test !IntervalMDP.isfinitetime(prop) @test convergence_eps(prop) == 1e-6 @test avoid(prop) == [CartesianIndex(3)] - @test terminal_states(prop) == [CartesianIndex(3)] end @testset "reward" begin prop = FiniteTimeReward([1.0, 2.0, 3.0], 0.9, 10) - @test isfinitetime(prop) + @test IntervalMDP.isfinitetime(prop) @test time_horizon(prop) == 10 @test reward(prop) == [1.0, 2.0, 3.0] @test discount(prop) == 0.9 prop = InfiniteTimeReward([1.0, 2.0, 3.0], 0.9, 1e-6) - @test !isfinitetime(prop) + @test !IntervalMDP.isfinitetime(prop) @test convergence_eps(prop) == 1e-6 @test reward(prop) == [1.0, 2.0, 3.0] @@ -101,11 +91,10 @@ using IntervalMDP @testset "expected exit time" begin prop = ExpectedExitTime([3], 1e-6) - @test !isfinitetime(prop) + @test !IntervalMDP.isfinitetime(prop) @test convergence_eps(prop) == 1e-6 @test avoid(prop) == [CartesianIndex(3)] - @test terminal_states(prop) == [CartesianIndex(3)] end end From 76985d86279a4f6950443374394a93044e7214c5 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Sat, 27 Sep 2025 20:27:18 +0200 Subject: [PATCH 43/71] WIP: Adding doctests --- docs/make.jl | 2 +- docs/src/developer.md | 5 + docs/src/reference/systems.md | 8 +- ext/IntervalMDPCudaExt.jl | 8 +- src/bellman.jl | 139 +----------------- .../FactoredRobustMarkovDecisionProcess.jl | 26 ++-- src/models/ProductProcess.jl | 8 +- src/models/models.jl | 2 +- src/probabilities/IntervalAmbiguitySets.jl | 47 +++--- src/probabilities/Marginal.jl | 4 +- src/robust_value_iteration.jl | 93 ++++++++++-- src/specification.jl | 4 +- src/strategy_cache.jl | 4 +- src/workspace.jl | 4 +- test/data/bmdp_tool.jl | 8 +- test/data/intervalmdp.jl | 4 +- test/data/prism.jl | 12 +- 17 files changed, 165 insertions(+), 213 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index bef6d9b9..7efa12ee 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -36,7 +36,7 @@ makedocs(; "Developer docs" => "developer.md", "References" => "references.md", ], - doctest = false, + doctest = true, checkdocs = :exports, plugins = [bib], ) diff --git a/docs/src/developer.md b/docs/src/developer.md index 59e7f217..925ea150 100644 --- a/docs/src/developer.md +++ b/docs/src/developer.md @@ -1,5 +1,10 @@ # Developer documentation +## Dense matrix vs sparse matrix vs BDD/ADD + +!!! todo + Describe the details and choice + ## Bellman algorithms ### [O-maximization](@id dev-docs-omax) To optimize the procedure, we abstract the O-maximization algorithm into the sorting phase and the O-maximization phase: diff --git a/docs/src/reference/systems.md b/docs/src/reference/systems.md index 9d84faa1..9846ffb9 100644 --- a/docs/src/reference/systems.md +++ b/docs/src/reference/systems.md @@ -10,8 +10,8 @@ AllStates ## [Factored RMDPs](@id api-frmdp) ```@docs FactoredRobustMarkovDecisionProcess -state_variables(s::FactoredRobustMarkovDecisionProcess) -action_variables(s::FactoredRobustMarkovDecisionProcess) +state_values(s::FactoredRobustMarkovDecisionProcess) +action_values(s::FactoredRobustMarkovDecisionProcess) marginals(s::FactoredRobustMarkovDecisionProcess) ``` @@ -29,18 +29,16 @@ state_variables(m::Marginal) action_variables(m::Marginal) source_shape(m::Marginal) action_shape(m::Marginal) -num_target(m::Marginal) getindex(p::Marginal, action, source) num_sets +num_target support ``` ### Interval ambiguity sets ```@docs IntervalAmbiguitySets -num_sets(p::IntervalAmbiguitySets) -num_target(p::IntervalAmbiguitySets) lower upper gap diff --git a/ext/IntervalMDPCudaExt.jl b/ext/IntervalMDPCudaExt.jl index 02e25fc2..f4a45798 100644 --- a/ext/IntervalMDPCudaExt.jl +++ b/ext/IntervalMDPCudaExt.jl @@ -21,8 +21,8 @@ function Adapt.adapt_structure( mdp::IntervalMDP.FactoredRMDP, ) return IntervalMDP.FactoredRMDP( - state_variables(mdp), - action_variables(mdp), + state_values(mdp), + action_values(mdp), IntervalMDP.source_shape(mdp), adapt(T, marginals(mdp)), adapt(CuArray{Int32}, initial_states(mdp)), @@ -35,8 +35,8 @@ function Adapt.adapt_structure( mdp::IntervalMDP.FactoredRMDP, ) return IntervalMDP.FactoredRMDP( - state_variables(mdp), - action_variables(mdp), + state_values(mdp), + action_values(mdp), IntervalMDP.source_shape(mdp), adapt(T, marginals(mdp)), adapt(Array{Int32}, initial_states(mdp)), diff --git a/src/bellman.jl b/src/bellman.jl index 01037f05..9aa356b6 100644 --- a/src/bellman.jl +++ b/src/bellman.jl @@ -1,142 +1,9 @@ -""" - bellman(V, model; upper_bound = false, maximize = true) - -Compute robust Bellman update with the value function `V` and the model `model`, e.g. [`IntervalMarkovDecisionProcess`](@ref), -that upper or lower bounds the expectation of the value function `V` via O-maximization [1]. -Whether the expectation is maximized or minimized is determined by the `upper_bound` keyword argument. -That is, if `upper_bound == true` then an upper bound is computed and if `upper_bound == false` then a lower -bound is computed. - -### Examples -```jldoctest -prob1 = IntervalProbabilities(; - lower = [ - 0.0 0.5 - 0.1 0.3 - 0.2 0.1 - ], - upper = [ - 0.5 0.7 - 0.6 0.5 - 0.7 0.3 - ], -) - -prob2 = IntervalProbabilities(; - lower = [ - 0.1 0.2 - 0.2 0.3 - 0.3 0.4 - ], - upper = [ - 0.6 0.6 - 0.5 0.5 - 0.4 0.4 - ], -) - -prob3 = IntervalProbabilities(; lower = [ - 0.0 - 0.0 - 1.0 -][:, :], upper = [ - 0.0 - 0.0 - 1.0 -][:, :]) - -transition_probs = [prob1, prob2, prob3] -istates = [Int32(1)] - -model = IntervalMarkovDecisionProcess(transition_probs, istates) - -Vprev = [1, 2, 3] -Vcur = IntervalMDP.bellman(Vprev, model; upper_bound = false) -``` - -!!! note - This function will construct a workspace object and an output vector. - For a hot-loop, it is more efficient to use `bellman!` and pass in pre-allocated objects. - -[1] M. Lahijanian, S. B. Andersson and C. Belta, "Formal Verification and Synthesis for Discrete-Time Stochastic Systems," in IEEE Transactions on Automatic Control, vol. 60, no. 8, pp. 2031-2045, Aug. 2015, doi: 10.1109/TAC.2015.2398883. - -""" function bellman(V, model, alg=default_bellman_algorithm(model); upper_bound = false, maximize = true) Vres = similar(V, source_shape(model)) return bellman!(Vres, V, model, alg; upper_bound = upper_bound, maximize = maximize) end -""" - bellman!(workspace, strategy_cache, Vres, V, model; upper_bound = false, maximize = true) - -Compute in-place robust Bellman update with the value function `V` and the model `model`, -e.g. [`IntervalMarkovDecisionProcess`](@ref), that upper or lower bounds the expectation of the value function `V` via O-maximization [1]. -Whether the expectation is maximized or minimized is determined by the `upper_bound` keyword argument. -That is, if `upper_bound == true` then an upper bound is computed and if `upper_bound == false` then a lower -bound is computed. - -The output is constructed in the input `Vres` and returned. The workspace object is also modified, -and depending on the type, the strategy cache may be modified as well. See [`construct_workspace`](@ref) -and [`construct_strategy_cache`](@ref) for more details on how to pre-allocate the workspace and strategy cache. - -### Examples - -```jldoctest -prob1 = IntervalProbabilities(; - lower = [ - 0.0 0.5 - 0.1 0.3 - 0.2 0.1 - ], - upper = [ - 0.5 0.7 - 0.6 0.5 - 0.7 0.3 - ], -) - -prob2 = IntervalProbabilities(; - lower = [ - 0.1 0.2 - 0.2 0.3 - 0.3 0.4 - ], - upper = [ - 0.6 0.6 - 0.5 0.5 - 0.4 0.4 - ], -) - -prob3 = IntervalProbabilities(; lower = [ - 0.0 - 0.0 - 1.0 -][:, :], upper = [ - 0.0 - 0.0 - 1.0 -][:, :]) - -transition_probs = [prob1, prob2, prob3] -istates = [Int32(1)] - -model = IntervalMarkovDecisionProcess(transition_probs, istates) - -V = [1, 2, 3] -workspace = construct_workspace(model) -strategy_cache = construct_strategy_cache(model) -Vres = similar(V) - -Vres = IntervalMDP.bellman!(workspace, strategy_cache, Vres, V, model; upper_bound = false, maximize = true) -``` - -[1] M. Lahijanian, S. B. Andersson and C. Belta, "Formal Verification and Synthesis for Discrete-Time Stochastic Systems," in IEEE Transactions on Automatic Control, vol. 60, no. 8, pp. 2031-2045, Aug. 2015, doi: 10.1109/TAC.2015.2398883. - -""" -function bellman! end - function bellman!(Vres, V, model, alg=default_bellman_algorithm(model); upper_bound = false, maximize = true) workspace = construct_workspace(model, alg) strategy_cache = construct_strategy_cache(model) @@ -192,7 +59,7 @@ function bellman!( # Select the value function for the current DFA state # according to the appropriate DFA transition function - map!(W, CartesianIndices(state_variables(mp))) do idx + map!(W, CartesianIndices(state_values(mp))) do idx return V[idx, dfa[state, lf[idx]]] end @@ -702,7 +569,7 @@ Base.@propagate_inbounds function state_action_bellman( Vₑ = workspace.expectation_cache # For each higher-level state in the product space - for I in CartesianIndices(state_variables(model)[2:end]) + for I in CartesianIndices(state_values(model)[2:end]) # For the first dimension, we need to copy the values from V v = orthogonal_inner_bellman!( workspace, @@ -715,7 +582,7 @@ Base.@propagate_inbounds function state_action_bellman( # For the remaining dimensions, if "full", compute expectation and store in the next level for d in 2:(length(ambiguity_sets) - 1) - if I[d - 1] == state_variables(model, d) + if I[d - 1] == state_values(model, d) v = orthogonal_inner_bellman!( workspace, Vₑ[d - 1], diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl index e0e979d3..cc1f410c 100644 --- a/src/models/FactoredRobustMarkovDecisionProcess.jl +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -85,13 +85,13 @@ function FactoredRMDP( end function check_rmdp(state_vars, action_vars, source_dims, transition, initial_states) - check_state_variables(state_vars, source_dims) - check_action_variables(action_vars) + check_state_values(state_vars, source_dims) + check_action_values(action_vars) check_transition(state_vars, action_vars, source_dims, transition) check_initial_states(state_vars, initial_states) end -function check_state_variables(state_vars, source_dims) +function check_state_values(state_vars, source_dims) if any(n -> n <= 0, state_vars) throw(ArgumentError("All state variables must be positive integers.")) end @@ -101,7 +101,7 @@ function check_state_variables(state_vars, source_dims) end end -function check_action_variables(action_vars) +function check_action_values(action_vars) if any(x -> x <= 0, action_vars) throw(ArgumentError("All action variables must be positive integers.")) end @@ -143,19 +143,19 @@ function check_initial_states(state_vars, initial_states) end """ - state_variables(mdp::FactoredRMDP) + state_values(mdp::FactoredRMDP) Return a tuple with the number of states for each state variable in the fRMDP. """ -state_variables(mdp::FactoredRMDP) = mdp.state_vars -state_variables(mdp::FactoredRMDP, r) = mdp.state_vars[r] +state_values(mdp::FactoredRMDP) = mdp.state_vars +state_values(mdp::FactoredRMDP, r) = mdp.state_vars[r] """ - action_variables(mdp::FactoredRMDP) + action_values(mdp::FactoredRMDP) Return a tuple with the number of actions for each action variable in the fRMDP. """ -action_variables(mdp::FactoredRMDP) = mdp.action_vars +action_values(mdp::FactoredRMDP) = mdp.action_vars """ marginals(mdp::FactoredRMDP) @@ -164,8 +164,8 @@ Return the marginals of the fRMDP. """ marginals(mdp::FactoredRMDP) = mdp.transition -num_states(mdp::FactoredRMDP) = prod(state_variables(mdp)) -num_actions(mdp::FactoredRMDP) = prod(action_variables(mdp)) +num_states(mdp::FactoredRMDP) = prod(state_values(mdp)) +num_actions(mdp::FactoredRMDP) = prod(action_values(mdp)) initial_states(mdp::FactoredRMDP) = mdp.initial_states source_shape(m::FactoredRMDP) = m.source_dims @@ -213,8 +213,8 @@ end function showsystem(io::IO, first_prefix, prefix, mdp::FactoredRMDP{N, M}) where {N, M} println(io, first_prefix, styled"{code:FactoredRobustMarkovDecisionProcess}") - println(io, prefix, "├─ ", N, styled" state variables with cardinality: {magenta:$(state_variables(mdp))}") - println(io, prefix, "├─ ", M, styled" action variables with cardinality: {magenta:$(action_variables(mdp))}") + println(io, prefix, "├─ ", N, styled" state variables with cardinality: {magenta:$(state_values(mdp))}") + println(io, prefix, "├─ ", M, styled" action variables with cardinality: {magenta:$(action_values(mdp))}") if initial_states(mdp) isa AllStates println(io, prefix, "├─ ", styled"Initial states: {magenta:All states}") else diff --git a/src/models/ProductProcess.jl b/src/models/ProductProcess.jl index 16b8f0b7..fa205b39 100644 --- a/src/models/ProductProcess.jl +++ b/src/models/ProductProcess.jl @@ -55,10 +55,10 @@ function checkproduct( ) # check labelling states (input) match MDP states - if size(labelling_func) != state_variables(mdp) + if size(labelling_func) != state_values(mdp) throw( DimensionMismatch( - "The mapped states $(size(labelling_func)) in the labelling function is not equal the fRMDP state variables $(state_variables(mdp)).", + "The mapped states $(size(labelling_func)) in the labelling function is not equal the fRMDP state variables $(state_values(mdp)).", ), ) end @@ -94,9 +94,9 @@ Return the labelling function of the product """ labelling_function(proc::ProductProcess) = proc.labelling_func -state_variables(proc::ProductProcess) = (state_variables(markov_process(proc))..., num_states(automaton(proc))) +state_values(proc::ProductProcess) = (state_values(markov_process(proc))..., num_states(automaton(proc))) source_shape(proc::ProductProcess) = (source_shape(markov_process(proc))..., num_states(automaton(proc))) -action_variables(proc::ProductProcess) = action_variables(markov_process(proc)) +action_values(proc::ProductProcess) = action_values(markov_process(proc)) action_shape(proc::ProductProcess) = action_shape(markov_process(proc)) Base.show(io::IO, proc::ProductProcess) = showsystem(io, "", "", proc) diff --git a/src/models/models.jl b/src/models/models.jl index 5fe531e4..bd3b29e9 100644 --- a/src/models/models.jl +++ b/src/models/models.jl @@ -5,7 +5,7 @@ export AllStates export num_states, num_actions, initial_states include("FactoredRobustMarkovDecisionProcess.jl") -export FactoredRobustMarkovDecisionProcess, state_variables, action_variables, marginals +export FactoredRobustMarkovDecisionProcess, state_values, action_values, marginals # Convenience model constructors - they all return a FactoredRobustMarkovDecisionProcess include("IntervalMarkovChain.jl") diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index 027ba66f..3d135b6a 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -4,27 +4,40 @@ !!! todo Update description -A matrix pair to represent the lower and upper bound transition probabilities from all source/action pairs to all target states. -The matrices can be `Matrix{R}` or `SparseMatrixCSC{R}`, or their CUDA equivalents. For memory efficiency, it is recommended to use sparse matrices. +A matrix pair to represent the lower and upper bound of `num_sets(ambiguity_set)` interval ambiguity sets (on the columns) +to `num_target(ambiguity_set)` destinations (on the rows). [Marginal](@ref) adds interpretation to the column indices. +The matrices can be `Matrix{R}` or `SparseMatrixCSC{R}`, or their CUDA equivalents. +Due to the space complexity, if modelling [IntervalMarkovChains](@ref IntervalMarkovChain) or [IntervalMarkovDecisionProcesses](@ref IntervalMarkovDecisionProcess), +it is recommended to use sparse matrices. -The columns represent the source and the rows represent the target, as if the probability matrix was a linear transformation. -Mathematically, let ``P`` be the probability matrix. Then ``P_{ij}`` represents the probability of transitioning from state ``j`` (or with state/action pair ``j``) to state ``i``. -Due to the column-major format of Julia, this is also a more efficient representation (in terms of cache locality). +The columns represent the different ambiguity sets and the rows represent the targets. Due to the column-major format of Julia, +this is a more efficient representation in terms of cache locality. The lower bound is explicitly stored, while the upper bound is computed from the lower bound and the gap. This choice is because it simplifies repeated probability assignment using O-maximization [givan2000bounded, lahijanian2015formal](@cite). ### Fields -- `lower::MR`: The lower bound transition probabilities from a source state or source/action pair to a target state. -- `gap::MR`: The gap between upper and lower bound transition probabilities from a source state or source/action pair to a target state. +- `lower::MR`: The lower bound probabilities for `num_sets(ambiguity_set)` ambiguity sets to `num_target(ambiguity_set)` target states. +- `gap::MR`: The gap between upper and lower bound transition probabilities for `num_sets(ambiguity_set)` ambiguity sets to `num_target(ambiguity_set)` target states. ### Examples ```jldoctest +using IntervalMDP, StyledStrings # hide dense_prob = IntervalAmbiguitySets(; lower = [0.0 0.5; 0.1 0.3; 0.2 0.1], upper = [0.5 0.7; 0.6 0.5; 0.7 0.3], ) +# output + +IntervalAmbiguitySets +├─ Storage type: Matrix{Float64} +├─ Number of target states: 3 +└─ Number of ambiguity sets: 2 +``` + +```jldoctest +using IntervalMDP, StyledStrings, SparseArrays # hide sparse_prob = IntervalAmbiguitySets(; lower = sparse_hcat( SparseVector(15, [4, 10], [0.1, 0.2]), @@ -35,6 +48,15 @@ sparse_prob = IntervalAmbiguitySets(; SparseVector(15, [5, 6, 7], [0.7, 0.5, 0.3]), ), ) + +# output + +IntervalAmbiguitySets +├─ Storage type: SparseArrays.FixedSparseCSC{Float64, Int64} +├─ Number of target states: 15 +├─ Number of ambiguity sets: 2 +├─ Maximum support size: 3 +└─ Number of non-zeros: 6 ``` """ @@ -171,18 +193,7 @@ function checkprobabilities!(lower::AbstractSparseMatrix, gap::AbstractSparseMat end end -""" - num_target(ambiguity_set::IntervalAmbiguitySets) - -Return the number of target states in the IntervalAmbiguitySets object. -""" num_target(p::IntervalAmbiguitySets) = size(p.lower, 1) - -""" - num_sets(ambiguity_set::IntervalAmbiguitySets) - -Return the number of ambiguity sets in the IntervalAmbiguitySets object. -""" num_sets(p::IntervalAmbiguitySets) = size(p.lower, 2) source_shape(p::IntervalAmbiguitySets) = (num_sets(p),) diff --git a/src/probabilities/Marginal.jl b/src/probabilities/Marginal.jl index 1d446d15..46ed313b 100644 --- a/src/probabilities/Marginal.jl +++ b/src/probabilities/Marginal.jl @@ -111,7 +111,7 @@ action_variables(p::Marginal) = p.action_indices source_shape(p::Marginal) Return the shape of the source (state) variables of the marginal. The [`FactoredRobustMarkovDecisionProcess`](@ref) -checks if this is less than or equal to the corresponding state variables. +checks if this is less than or equal to the corresponding state values. """ source_shape(p::Marginal) = p.source_dims @@ -119,7 +119,7 @@ source_shape(p::Marginal) = p.source_dims action_shape(p::Marginal) Return the shape of the action variables of the marginal. The [`FactoredRobustMarkovDecisionProcess`](@ref) -checks if this is equal to the corresponding action variables. +checks if this is equal to the corresponding action values. """ action_shape(p::Marginal) = p.action_vars diff --git a/src/robust_value_iteration.jl b/src/robust_value_iteration.jl index f6b6b126..b7dfdae9 100644 --- a/src/robust_value_iteration.jl +++ b/src/robust_value_iteration.jl @@ -30,8 +30,9 @@ iteration count. The callback function should have the signature `callback(V::Ab ### Examples -```jldoctest -prob1 = IntervalProbabilities(; +```jldoctest robust_vi +using IntervalMDP # hide +prob1 = IntervalAmbiguitySets(; lower = [ 0.0 0.5 0.1 0.3 @@ -44,7 +45,7 @@ prob1 = IntervalProbabilities(; ], ) -prob2 = IntervalProbabilities(; +prob2 = IntervalAmbiguitySets(; lower = [ 0.1 0.2 0.2 0.3 @@ -57,29 +58,99 @@ prob2 = IntervalProbabilities(; ], ) -prob3 = IntervalProbabilities(; - lower = [0.0; 0.0; 1.0], - upper = [0.0; 0.0; 1.0] +prob3 = IntervalAmbiguitySets(; + lower = [ + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ], + upper = [ + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ] ) transition_probs = [prob1, prob2, prob3] -initial_state = 1 +initial_state = [1] mdp = IntervalMarkovDecisionProcess(transition_probs, initial_state) +# output + +FactoredRobustMarkovDecisionProcess +├─ 1 state variables with cardinality: (3,) +├─ 1 action variables with cardinality: (2,) +├─ Initial states: [1] +├─ Transition marginals: +│ └─ Marginal 1: +│ ├─ Conditional variables: states = (1,), actions = (1,) +│ └─ Ambiguity set type: Interval (dense, Matrix{Float64}) +└─Inferred properties + ├─Model type: Interval MDP + ├─Number of states: 3 + ├─Number of actions: 2 + ├─Default model checking algorithm: Robust Value Iteration + └─Default Bellman operator algorithm: O-Maximization +``` + +```jldoctest robust_vi reach_states = [3] time_horizon = 10 prop = FiniteTimeReachability(reach_states, time_horizon) spec = Specification(prop, Pessimistic, Maximize) -### Verification +# output + +Specification +├─ Satisfaction mode: Pessimistic +├─ Strategy mode: Maximize +└─ Property: FiniteTimeReachability + ├─ Time horizon: 10 + └─ Reach states: CartesianIndex{1}[CartesianIndex(3,)] +``` + + +```jldoctest robust_vi +# Verification problem = VerificationProblem(mdp, spec) -sol = solve(problem, RobustValueIteration(); callback = (V, k) -> println("Iteration ", k)) +sol = solve(problem, RobustValueIteration(default_bellman_algorithm(mdp)); callback = (V, k) -> println("Iteration ", k)) V, k, res = sol # or `value_function(sol), num_iterations(sol), residual(sol)` +# output + +Iteration 1 +Iteration 2 +Iteration 3 +Iteration 4 +Iteration 5 +Iteration 6 +Iteration 7 +Iteration 8 +Iteration 9 +Iteration 10 +IntervalMDP.VerificationSolution{Float64, Vector{Float64}, Nothing}([0.9597716063999999, 0.9710050144, 1.0], [0.01593864639999998, 0.011487926399999848, -0.0], 10, nothing) + +``` + +```jldoctest robust_vi # Control synthesis problem = ControlSynthesisProblem(mdp, spec) -sol = solve(problem, RobustValueIteration(); callback = (V, k) -> println("Iteration ", k)) +sol = solve(problem, RobustValueIteration(default_bellman_algorithm(mdp)); callback = (V, k) -> println("Iteration ", k)) σ, V, k, res = sol # or `strategy(sol), value_function(sol), num_iterations(sol), residual(sol)` + +# output + +Iteration 1 +Iteration 2 +Iteration 3 +Iteration 4 +Iteration 5 +Iteration 6 +Iteration 7 +Iteration 8 +Iteration 9 +Iteration 10 +IntervalMDP.ControlSynthesisSolution{TimeVaryingStrategy{1, Vector{Tuple{Int32}}}, Float64, Vector{Float64}, Nothing}(TimeVaryingStrategy{1, Vector{Tuple{Int32}}}(Vector{Tuple{Int32}}[[(1,), (2,), (1,)], [(1,), (2,), (1,)], [(1,), (2,), (1,)], [(1,), (2,), (1,)], [(1,), (2,), (1,)], [(1,), (2,), (1,)], [(1,), (2,), (1,)], [(1,), (2,), (1,)], [(1,), (2,), (1,)], [(1,), (2,), (1,)]]), [0.9597716063999999, 0.9710050144, 1.0], [0.01593864639999998, 0.011487926399999848, -0.0], 10, nothing) ``` """ function solve(problem::VerificationProblem, alg::RobustValueIteration; kwargs...) @@ -161,7 +232,7 @@ end function ValueFunction(problem::AbstractIntervalMDPProblem) mp = system(problem) - previous = arrayfactory(mp, valuetype(mp), state_variables(mp)) + previous = arrayfactory(mp, valuetype(mp), state_values(mp)) previous .= zero(valuetype(mp)) current = copy(previous) diff --git a/src/specification.jl b/src/specification.jl index 683e1a03..e5a8b760 100644 --- a/src/specification.jl +++ b/src/specification.jl @@ -392,7 +392,7 @@ function step_postprocess_value_function!(value_function, prop::AbstractReachAvo end function checkstatebounds(states, system::IntervalMarkovProcess) - pns = state_variables(system) + pns = state_values(system) for j in states j = Tuple(j) @@ -768,7 +768,7 @@ postprocess_value_function!(value_function, ::AbstractReward) = value_function function checkreward(prop::AbstractReward, system) checkdevice(reward(prop), system) - pns = state_variables(system) + pns = state_values(system) if size(reward(prop)) != pns throw( DimensionMismatch( diff --git a/src/strategy_cache.jl b/src/strategy_cache.jl index 391e0e6e..3f0f6b92 100644 --- a/src/strategy_cache.jl +++ b/src/strategy_cache.jl @@ -64,7 +64,7 @@ end function construct_strategy_cache(problem::ControlSynthesisProblem, time_varying::Val{true}) mp = system(problem) - N = length(action_variables(mp)) + N = length(action_values(mp)) cur_strategy = arrayfactory(mp, NTuple{N, Int32}, source_shape(mp)) cur_strategy .= (ntuple(_ -> 0, N),) return TimeVaryingStrategyCache(cur_strategy) @@ -99,7 +99,7 @@ function construct_strategy_cache( time_varying::Val{false}, ) mp = system(problem) - N = length(action_variables(mp)) + N = length(action_values(mp)) strategy = arrayfactory(mp, NTuple{N, Int32}, source_shape(mp)) strategy .= (ntuple(_ -> 0, N),) return StationaryStrategyCache(strategy) diff --git a/src/workspace.jl b/src/workspace.jl index e8790cba..ad8f72a1 100644 --- a/src/workspace.jl +++ b/src/workspace.jl @@ -19,7 +19,7 @@ end function construct_workspace(proc::ProductProcess, alg=default_bellman_algorithm(proc); kwargs...) mp = markov_process(proc) underlying_workspace = construct_workspace(mp, alg; kwargs...) - intermediate_values = arrayfactory(mp, valuetype(mp), state_variables(mp)) + intermediate_values = arrayfactory(mp, valuetype(mp), state_values(mp)) return ProductWorkspace(underlying_workspace, intermediate_values) end @@ -239,7 +239,7 @@ function FactoredVertexIteratorWorkspace(sys::FactoredRMDP) N = length(marginals(sys)) R = valuetype(sys) - result_vectors = ntuple(r -> Vector{R}(undef, state_variables(sys, r)), N) + result_vectors = ntuple(r -> Vector{R}(undef, state_values(sys, r)), N) actions = Array{valuetype(sys)}(undef, action_shape(sys)) return FactoredVertexIteratorWorkspace(result_vectors, actions) diff --git a/test/data/bmdp_tool.jl b/test/data/bmdp_tool.jl index 2c043b23..0a0ac3a3 100644 --- a/test/data/bmdp_tool.jl +++ b/test/data/bmdp_tool.jl @@ -27,8 +27,8 @@ as = ambiguity_sets(marginal) @test source_shape(marginal) == source_shape(new_marginal) @test action_shape(marginal) == action_shape(new_marginal) @test num_target(marginal) == num_target(new_marginal) - @test state_variables(mdp) == state_variables(new_mdp) - @test action_variables(mdp) == action_variables(new_mdp) + @test state_values(mdp) == state_values(new_mdp) + @test action_values(mdp) == action_values(new_mdp) @test as.lower ≈ new_as.lower @test as.gap ≈ new_as.gap @@ -60,8 +60,8 @@ end @test source_shape(marginal) == source_shape(new_marginal) @test action_shape(marginal) == action_shape(new_marginal) @test num_target(marginal) == num_target(new_marginal) - @test state_variables(mdp) == state_variables(new_mdp) - @test action_variables(mdp) == action_variables(new_mdp) + @test state_values(mdp) == state_values(new_mdp) + @test action_values(mdp) == action_values(new_mdp) @test as.lower ≈ new_as.lower @test as.gap ≈ new_as.gap diff --git a/test/data/intervalmdp.jl b/test/data/intervalmdp.jl index c97ba1aa..c4db48fe 100644 --- a/test/data/intervalmdp.jl +++ b/test/data/intervalmdp.jl @@ -27,8 +27,8 @@ write_intervalmdp_jl_model("data/multiObj_robotIMDP.nc", mdp) @test source_shape(marginal) == source_shape(new_marginal) @test action_shape(marginal) == action_shape(new_marginal) @test num_target(marginal) == num_target(new_marginal) - @test state_variables(mdp) == state_variables(new_mdp) - @test action_variables(mdp) == action_variables(new_mdp) + @test state_values(mdp) == state_values(new_mdp) + @test action_values(mdp) == action_values(new_mdp) @test as.lower ≈ new_as.lower @test as.gap ≈ new_as.gap diff --git a/test/data/prism.jl b/test/data/prism.jl index 19657442..50a4face 100644 --- a/test/data/prism.jl +++ b/test/data/prism.jl @@ -44,8 +44,8 @@ new_as = ambiguity_sets(new_marginal) @test source_shape(marginal) == source_shape(new_marginal) @test action_shape(marginal) == action_shape(new_marginal) @test num_target(marginal) == num_target(new_marginal) -@test state_variables(mdp) == state_variables(new_mdp) -@test action_variables(mdp) == action_variables(new_mdp) +@test state_values(mdp) == state_values(new_mdp) +@test action_values(mdp) == action_values(new_mdp) @test as.lower ≈ new_as.lower @test as.gap ≈ new_as.gap @@ -105,8 +105,8 @@ new_as = ambiguity_sets(new_marginal) @test source_shape(marginal) == source_shape(new_marginal) @test action_shape(marginal) == action_shape(new_marginal) @test num_target(marginal) == num_target(new_marginal) -@test state_variables(mdp) == state_variables(new_mdp) -@test action_variables(mdp) == action_variables(new_mdp) +@test state_values(mdp) == state_values(new_mdp) +@test action_values(mdp) == action_values(new_mdp) @test as.lower ≈ new_as.lower @test as.gap ≈ new_as.gap @@ -162,8 +162,8 @@ new_as = ambiguity_sets(new_marginal) @test source_shape(marginal) == source_shape(new_marginal) @test action_shape(marginal) == action_shape(new_marginal) @test num_target(marginal) == num_target(new_marginal) -@test state_variables(mdp) == state_variables(new_mdp) -@test action_variables(mdp) == action_variables(new_mdp) +@test state_values(mdp) == state_values(new_mdp) +@test action_values(mdp) == action_values(new_mdp) @test as.lower ≈ new_as.lower @test as.gap ≈ new_as.gap From 5a5a275aa9e8f24d822fd143eaed4968ad534333 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Sat, 27 Sep 2025 20:57:16 +0200 Subject: [PATCH 44/71] Populate docstring for FactoredRobustMarkovDecisionProcess --- docs/src/reference/specifications.md | 24 ++-- .../FactoredRobustMarkovDecisionProcess.jl | 109 ++++++++++++++++-- src/probabilities/IntervalAmbiguitySets.jl | 5 +- src/robust_value_iteration.jl | 3 +- 4 files changed, 117 insertions(+), 24 deletions(-) diff --git a/docs/src/reference/specifications.md b/docs/src/reference/specifications.md index 1eb2504c..e03cc35d 100644 --- a/docs/src/reference/specifications.md +++ b/docs/src/reference/specifications.md @@ -14,18 +14,6 @@ strategy_mode StrategyMode ``` -## DFA Reachability - -```@docs -FiniteTimeDFAReachability -reach(prop::FiniteTimeDFAReachability) -time_horizon(prop::FiniteTimeDFAReachability) - -InfiniteTimeDFAReachability -reach(prop::InfiniteTimeDFAReachability) -convergence_eps(prop::InfiniteTimeDFAReachability) -``` - ## Reachability ```@docs @@ -93,4 +81,16 @@ convergence_eps(prop::InfiniteTimeReward) ExpectedExitTime avoid(prop::ExpectedExitTime) convergence_eps(prop::ExpectedExitTime) +``` + +## DFA Reachability + +```@docs +FiniteTimeDFAReachability +reach(prop::FiniteTimeDFAReachability) +time_horizon(prop::FiniteTimeDFAReachability) + +InfiniteTimeDFAReachability +reach(prop::InfiniteTimeDFAReachability) +convergence_eps(prop::InfiniteTimeDFAReachability) ``` \ No newline at end of file diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl index cc1f410c..b0b51131 100644 --- a/src/models/FactoredRobustMarkovDecisionProcess.jl +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -1,13 +1,104 @@ """ - FactoredRobustMarkovDecisionProcess - -!!! todo - Add intuitive description and formal definition - -!!! todo - Add fields reference and relation to definition - -!!! todo + FactoredRobustMarkovDecisionProcess{N, M, P <: NTuple{N, Marginal}, VI <: InitialStates} <: IntervalMarkovProcess + +Factored Robust Markov Decision Processes (fRMDPs) [schnitzer2025efficient, delgado2011efficient](@cite) are +an extension of Robust Markov Decision Processes (RMDPs) [nilim2005robust, wiesemann2013robust, suilen2024robust](@cite) +that incorporate a factored representation of the state and action spaces, i.e. with state and action variables. + +Formally, a fRMDP ``M`` is a tuple ``M = (S, S_0, A, \\mathcal{G}, \\Gamma)``, where + +- ``S = S_1 \\times \\cdots \\times S_n`` is a finite set of joint states with ``S_i`` + being a finite set of states for the ``i``-th state variable, +- ``S_0 \\subseteq S`` is a set of initial states, +- ``A = A_1 \\times \\cdots \\times A_m`` is a finite set of joint actions with ``A_j`` + being a finite set of actions for the ``j``-th action variable, +- ``\\mathcal{G} = (\\mathcal{V}, \\mathcal{E})`` is a directed bipartite graph with nodes + ``\\mathcal{V} = \\mathcal{V}_{ind} \\cup \\mathcal{V}_{cond} = \\{S_1, \\ldots, S_n, A_1, \\ldots, A_m\\} \\cup \\{S'_1, \\ldots, S'_n\\}`` + representing the state and action variables and their next-state counterparts, and edges + ``\\mathcal{E} \\subseteq \\mathcal{V}_{ind} \\times \\mathcal{V}_{cond}`` + representing dependencies of ``S'_i`` on ``S_j`` and ``A_k``, +- ``\\Gamma = \\{\\Gamma_{s,a}\\}_{s\\in S,a \\in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, + where each ``\\Gamma_{s,a} = \\bigotimes_{i=1}^n \\Gamma^i_{\\text{Pa}_\\mathcal{G}(S'_i) \\cap (s, a)}`` is + a product of ambiguity sets ``\\Gamma^i_{\\text{Pa}_\\mathcal{G}(S'_i) \\cap (s, a)}`` along each marginal ``i`` conditional + on the values in ``(s, a)`` of the parent variables ``\\text{Pa}_\\mathcal{G}(S'_i)`` of ``S'_i`` in ``\\mathcal{G}``, i.e. +```math + \\Gamma_{s,a} = \\left\\{ \\gamma \\in \\mathcal{D}(S) \\,:\\, \\gamma(t) = \\prod_{i=1}^n \\gamma^i(t_i | s_{\\text{Pa}_{\\mathcal{G}_S}(S'_i)}, a_{\\text{Pa}_{\\mathcal{G}_A}(S'_i)}), \\, \\gamma^i(\\cdot | s_{\\text{Pa}_{\\mathcal{G}_S}(S'_i)}, a_{\\text{Pa}_{\\mathcal{G}_A}(S'_i)}) \\in \\Gamma^i_{\\text{Pa}_\\mathcal{G}(S'_i)} \\right\\}. +``` + +For a given source-action pair ``(s, a) \\in S \\times A``, any distribution ``\\gamma_{s, a} \\in \\Gamma_{s,a}`` is called a feasible distribution, +and feasible transitions are triplets ``(s, a, t) \\in S \\times A \\times S`` where ``t \\in \\mathop{supp}(\\gamma_{s, a})`` for any feasible distribution ``\\gamma_{s, a} \\in \\Gamma_{s, a}``. + +### Type parameters +- `N` is the number of state variables. +- `M` is the number of action variables. +- `P <: NTuple{N, Marginal}` is a tuple type with a (potentially different) type for each marginal. +- `VI <: InitialStates` is the type of initial states. + +### Fields +- `state_vars::NTuple{N, Int32}`: the number of values ``|S_i|`` for each state variable ``S_i`` as a tuple. +- `action_vars::NTuple{M, Int32}`: the number of values ``|A_k|`` for each action variable ``A_k`` as a tuple. +- `source_dims::NTuple{N, Int32}`: for systems with terminal states along certain slices, it is possible to avoid + specifying them by using `source_dims` less than `state_vars`; this is useful e.g. in building abstractions. + The terminal states must be the last value for the slice dimension. If not supplied, it is assumed `source_dims == state_vars`. +- `transition::P` is the marginal ambiguity sets. For a given source-action pair ``(s, a) \\in S \\times A``, + any [`Marginal`](@ref) element of `transition` subselects `s` and `a` corresponding to its [`state_variables`](@ref) + and [`action_variables`](@ref), i.e. it encodes the operation `\\text{Pa}_\\mathcal{G}(S'_i) \\cap (s, a)`. + The underlying `ambiguity_sets` object on `Marginal` encodes ``\\Gamma^i_{\\text{Pa}_\\mathcal{G}(S'_i) \\cap (s, a)}`` + for all values of ``\\text{Pa}_\\mathcal{G}(S'_i)``. See [`Marginal`](@ref) for details about the layout of the underlying + `AbstractAmbiguitySets` object. +- `initial_states::VI`: stores a representation of `S_0`. If no set of initial_states are given, then it is simply assigned + the zero-byte object `AllStates()`, which represents that all states are potential initial states. It is not used within + the value iteration. + +### Example +```jldoctest +using IntervalMDP # hide + +state_vars = (2, 3) +action_vars = (1, 2) + +state_indices = (1, 2) +action_indices = (1,) +state_dims = (2, 3) +action_dims = (1,) +marginal1 = Marginal(IntervalAmbiguitySets(; + # 6 ambiguity sets = 2 * 3 source states, 1 action + # Column layout: (a¹₁, s¹₁, s²₁), (a¹₁, s¹₂, s²₁), (a¹₁, s¹₁, s²₂), (a¹₁, s¹₂, s²₂), (a¹₁, s¹₁, s²₃), (a¹₁, s¹₂, s²₃) + # Equivalent to CartesianIndices(actions_dims..., state_dims...), i.e. actions first, then states in lexicographic order + lower = [ + 1/15 7/30 1/15 13/30 4/15 1/6 + 2/5 7/30 1/30 11/30 2/15 1/10 + ], + upper = [ + 17/30 7/10 2/3 4/5 7/10 2/3 + 9/10 13/15 9/10 5/6 4/5 14/15 + ] +), state_indices, action_indices, state_dims, action_dims) + +state_indices = (2,) +action_indices = (2,) +state_dims = (3,) +action_dims = (2,) +marginal2 = Marginal(IntervalAmbiguitySets(; + # 6 ambiguity sets = 3 source states, 2 actions + # Column layout: (a²₁, s²₁), (a²₂, s²₁), (a²₁, s²₂), (a²₂, s²₂), (a²₁, s²₃), (a²₂, s²₃) + # Equivalent to CartesianIndices(actions_dims..., state_dims...), i.e. actions first, then states in lexicographic order + lower = [ + 1/30 1/3 1/6 1/15 2/5 2/15 + 4/15 1/4 1/6 1/30 2/15 1/30 + 2/15 7/30 1/10 7/30 7/15 1/5 + ], + upper = [ + 2/3 7/15 4/5 11/30 19/30 1/2 + 23/30 4/5 23/30 3/5 7/10 8/15 + 7/15 4/5 23/30 7/10 7/15 23/30 + ] +), state_indices, action_indices, state_dims, action_dims) + +initial_states = [(1, 1)] # Initial states are optional +mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2), initial_states) +``` + !!! todo Add example """ diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index 3d135b6a..c116e4bc 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -22,7 +22,8 @@ because it simplifies repeated probability assignment using O-maximization [giva ### Examples ```jldoctest -using IntervalMDP, StyledStrings # hide +using IntervalMDP + dense_prob = IntervalAmbiguitySets(; lower = [0.0 0.5; 0.1 0.3; 0.2 0.1], upper = [0.5 0.7; 0.6 0.5; 0.7 0.3], @@ -37,7 +38,7 @@ IntervalAmbiguitySets ``` ```jldoctest -using IntervalMDP, StyledStrings, SparseArrays # hide +using IntervalMDP, SparseArrays sparse_prob = IntervalAmbiguitySets(; lower = sparse_hcat( SparseVector(15, [4, 10], [0.1, 0.2]), diff --git a/src/robust_value_iteration.jl b/src/robust_value_iteration.jl index b7dfdae9..3a2ebeb1 100644 --- a/src/robust_value_iteration.jl +++ b/src/robust_value_iteration.jl @@ -31,7 +31,8 @@ iteration count. The callback function should have the signature `callback(V::Ab ### Examples ```jldoctest robust_vi -using IntervalMDP # hide +using IntervalMDP + prob1 = IntervalAmbiguitySets(; lower = [ 0.0 0.5 From cd10ad8dc411f12e392cb207022ae6e2827d4289 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Sat, 27 Sep 2025 20:58:23 +0200 Subject: [PATCH 45/71] Add doctest output --- .../FactoredRobustMarkovDecisionProcess.jl | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl index b0b51131..fa68c16b 100644 --- a/src/models/FactoredRobustMarkovDecisionProcess.jl +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -52,7 +52,7 @@ and feasible transitions are triplets ``(s, a, t) \\in S \\times A \\times S`` w ### Example ```jldoctest -using IntervalMDP # hide +using IntervalMDP state_vars = (2, 3) action_vars = (1, 2) @@ -97,10 +97,27 @@ marginal2 = Marginal(IntervalAmbiguitySets(; initial_states = [(1, 1)] # Initial states are optional mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2), initial_states) -``` - !!! todo - Add example +# output + +FactoredRobustMarkovDecisionProcess +├─ 2 state variables with cardinality: (2, 3) +├─ 2 action variables with cardinality: (1, 2) +├─ Initial states: [(1, 1)] +├─ Transition marginals: +│ ├─ Marginal 1: +│ │ ├─ Conditional variables: states = (1, 2), actions = (1,) +│ │ └─ Ambiguity set type: Interval (dense, Matrix{Float64}) +│ └─ Marginal 2: +│ ├─ Conditional variables: states = (2,), actions = (2,) +│ └─ Ambiguity set type: Interval (dense, Matrix{Float64}) +└─Inferred properties + ├─Model type: Factored Interval MDP + ├─Number of states: 6 + ├─Number of actions: 2 + ├─Default model checking algorithm: Robust Value Iteration + └─Default Bellman operator algorithm: Binary tree LP McCormick Relaxation +``` """ struct FactoredRobustMarkovDecisionProcess{ N, From ab9e89e70fba7520f842b1fcee6082c567734344 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Sat, 27 Sep 2025 21:16:49 +0200 Subject: [PATCH 46/71] Populate docstrings for IMCs and IMDPs --- docs/src/models.md | 18 +-- .../FactoredRobustMarkovDecisionProcess.jl | 8 +- src/models/IntervalMarkovChain.jl | 49 ++++++- src/models/IntervalMarkovDecisionProcess.jl | 122 +++++++++++++++++- src/probabilities/IntervalAmbiguitySets.jl | 11 +- 5 files changed, 178 insertions(+), 30 deletions(-) diff --git a/docs/src/models.md b/docs/src/models.md index e74555c9..3ff112db 100644 --- a/docs/src/models.md +++ b/docs/src/models.md @@ -3,7 +3,7 @@ We denote the natural numbers by ``\mathbb{N}`` and ``\mathbb{N}_0 = \mathbb{N} \cup \{0\}``. A probability distribution ``\gamma`` over a finite set ``S`` is a function ``\gamma : S \to [0, 1]`` satisfying ``\sum_{s \in S} \gamma(s) = 1``. The support of the distribution ``\mathop{supp}(\gamma)`` is defined as ``\mathop{supp}(\gamma) = \{ s \in S : \gamma(s) > 0\}``. We denote by ``\mathcal{D}(S)`` the set of all probability distributions over ``S``. For ``\underline{\gamma}, \overline{\gamma} : S \to [0, 1]`` such that ``\underline{\gamma}(s) \leq \overline{\gamma}(s)`` for each ``s \in S`` and ``\sum_{s \in S} \underline{\gamma}(s) \leq 1 \leq \sum_{s \in S} \overline{\gamma}(s)``, an interval ambiguity set ``\Gamma \subset \mathcal{D}(S)`` is the set of distributions such that ```math - \Gamma = \{ \gamma \in \mathcal{D}(S) \,:\, \underline{\gamma}(s) \leq \gamma(s) \leq \overline{\gamma}(s) \text{ for each } s\in S \}. + \Gamma = \{ \gamma \in \mathcal{D}(S) \,:\, \underline{\gamma}(s) \leq \gamma(s) \leq \overline{\gamma}(s) \text{ for each } s \in S \}. ``` ``\underline{\gamma}, \overline{\gamma}`` are referred to as the interval bounds of the interval ambiguity set. For ``n`` finite sets ``S_1, \ldots, S_n`` we denote by ``S_1 \times \cdots \times S_n`` their Cartesian product. Given ``S = S_1 \times \cdots \times S_n`` and ``n`` ambiguity sets ``\Gamma_i \in \mathcal{D}(S_i)``, ``i = 1, \ldots, n``, the product ambiguity set ``\Gamma \subseteq \mathcal{D}(S)`` is defined as: @@ -21,12 +21,12 @@ Formally, a fRMDP ``M`` is a tuple ``M = (S, S_0, A, \mathcal{G}, \Gamma)``, whe - ``S_0 \subseteq S`` is a set of initial states, - ``A = A_1 \times \cdots \times A_m`` is a finite set of joint actions with ``A_j`` being a finite set of actions for the ``j``-th action variable, - ``\mathcal{G} = (\mathcal{V}, \mathcal{E})`` is a directed bipartite graph with nodes ``\mathcal{V} = \mathcal{V}_{ind} \cup \mathcal{V}_{cond} = \{S_1, \ldots, S_n, A_1, \ldots, A_m\} \cup \{S'_1, \ldots, S'_n\}`` representing the state and action variables and their next-state counterparts, and edges ``\mathcal{E} \subseteq \mathcal{V}_{ind} \times \mathcal{V}_{cond}`` representing dependencies of ``S'_i`` on ``S_j`` and ``A_k``, -- ``\Gamma = \{\Gamma_{s,a}\}_{s\in S,a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\Gamma_{s,a} = \bigotimes_{i=1}^n \Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` is a product of ambiguity sets ``\Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` along each marginal ``i`` conditional on the values in ``(s, a)`` of the parent variables ``\text{Pa}_\mathcal{G}(S'_i)`` of ``S'_i`` in ``\mathcal{G}``, i.e. +- ``\Gamma = \{\Gamma_{s, a}\}_{s \in S, a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\Gamma_{s, a} = \bigotimes_{i=1}^n \Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` is a product of ambiguity sets ``\Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` along each marginal ``i`` conditional on the values in ``(s, a)`` of the parent variables ``\text{Pa}_\mathcal{G}(S'_i)`` of ``S'_i`` in ``\mathcal{G}``, i.e. ```math - \Gamma_{s,a} = \left\{ \gamma \in \mathcal{D}(S) \,:\, \gamma(t) = \prod_{i=1}^n \gamma^i(t_i | s_{\text{Pa}_{\mathcal{G}_S}(S'_i)}, a_{\text{Pa}_{\mathcal{G}_A}(S'_i)}), \, \gamma^i(\cdot | s_{\text{Pa}_{\mathcal{G}_S}(S'_i)}, a_{\text{Pa}_{\mathcal{G}_A}(S'_i)}) \in \Gamma^i_{\text{Pa}_\mathcal{G}(S'_i)} \right\}. + \Gamma_{s, a} = \left\{ \gamma \in \mathcal{D}(S) \,:\, \gamma(t) = \prod_{i=1}^n \gamma^i(t_i | s_{\text{Pa}_{\mathcal{G}_S}(S'_i)}, a_{\text{Pa}_{\mathcal{G}_A}(S'_i)}), \, \gamma^i(\cdot | s_{\text{Pa}_{\mathcal{G}_S}(S'_i)}, a_{\text{Pa}_{\mathcal{G}_A}(S'_i)}) \in \Gamma^i_{\text{Pa}_\mathcal{G}(S'_i)} \right\}. ``` -For a given source-action pair ``(s, a) \in S \times A``, any distribution ``\gamma_{s, a} \in \Gamma_{s,a}`` is called a feasible distribution, and feasible transitions are triplets ``(s, a, t) \in S \times A \times S`` where ``t \in \mathop{supp}(\gamma_{s, a})`` for any feasible distribution ``\gamma_{s, a} \in \Gamma_{s, a}``. A path of an fRMDP is a sequence of states and actions ``\omega = s[0], a[0], s[1], a[1], \dots`` where ``s[k] \in S`` and ``a[k] \in A`` for all ``k \in \mathbb{N}_0``, and ``(s[k], a[k], s[k + 1])`` is a feasible transition for all ``k \in \mathbb{N}_0``. We denote by ``\omega[k] = s[k]`` the state of the path at time ``k \in \mathbb{N}_0`` and by ``\Omega`` and ``\Omega_{fin}`` the set of all infinite and finite paths, respectively. +For a given source-action pair ``(s, a) \in S \times A``, any distribution ``\gamma_{s, a} \in \Gamma_{s, a}`` is called a feasible distribution, and feasible transitions are triplets ``(s, a, t) \in S \times A \times S`` where ``t \in \mathop{supp}(\gamma_{s, a})`` for any feasible distribution ``\gamma_{s, a} \in \Gamma_{s, a}``. A path of an fRMDP is a sequence of states and actions ``\omega = s[0], a[0], s[1], a[1], \dots`` where ``s[k] \in S`` and ``a[k] \in A`` for all ``k \in \mathbb{N}_0``, and ``(s[k], a[k], s[k + 1])`` is a feasible transition for all ``k \in \mathbb{N}_0``. We denote by ``\omega[k] = s[k]`` the state of the path at time ``k \in \mathbb{N}_0`` and by ``\Omega`` and ``\Omega_{fin}`` the set of all infinite and finite paths, respectively. A _strategy_ or _policy_ for an fRMDP is a function ``\pi : \Omega_{fin} \to A`` that assigns an action, given a (finite) path called the history. _Time-dependent_ Markov strategies are functions from state and time step to an action, i.e. ``\pi : S \times \mathbb{N}_0 \to A``. This can equivalently be described as a sequence of functions indexed by time ``\mathbf{\pi} = (\pi[0], \pi[1], \ldots)``. If ``\pi`` does not depend on time and solely depends on the current state, it is called a _stationary_ strategy. Similar to a strategy, an adversary ``\eta`` is a function that assigns a feasible distribution to a given state. The focus of this package is on dynamic uncertainties where the choice of the adversary is resolved at every time step, called dynamic uncertainty, and where the adversary has access to both the current state and action, called ``(s, a)``-rectangularity. We refer to [suilen2024robust](@cite) for further details on the distinction between static and dynamic uncertainties, types of rectangularity, and their implications. Given a strategy and an adversary, an fRMDP collapses to a finite (factored) Markov chain. @@ -88,7 +88,7 @@ Formally, an IMC ``M`` is a tuple ``M = (S, S_0, \Gamma)``, where - ``S`` is a finite set of states, - ``S_0 \subseteq S`` is a set of initial states, -- ``\Gamma = \{\Gamma_{s}\}_{s\in S}`` is a set of ambiguity sets for source state ``s``, where each ``\Gamma_{s}`` is an interval ambiguity set over ``S``. +- ``\Gamma = \{\Gamma_{s}\}_{s \in S}`` is a set of ambiguity sets for source state ``s``, where each ``\Gamma_{s}`` is an interval ambiguity set over ``S``. An IMC is equivalent to an fRMDP where there is only one state variable, no action variables, and the ambiguity sets are interval ambiguity sets. The dependency graph is just two nodes ``S`` and ``S'`` with a single edge from the former to the latter. Paths and adversaries are defined similarly to fRMDPs. @@ -120,7 +120,7 @@ Formally, an IMDP ``M`` is a tuple ``M = (S, S_0, A, \Gamma)``, where - ``S`` is a finite set of states, - ``S_0 \subseteq S`` is a set of initial states, - ``A`` is a finite set of actions, -- ```\Gamma = \{\Gamma_{s,a}\}_{s\in S,a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\Gamma_{s,a}`` is an interval ambiguity set over ``S``. +- ```\Gamma = \{\Gamma_{s, a}\}_{s \in S, a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\Gamma_{s, a}`` is an interval ambiguity set over ``S``. An IMDP is equivalent to an fRMDP where there is only one state variable, one action variable, and the ambiguity sets are interval ambiguity sets. The dependency graph is three nodes ``S``, ``A``, and ``S'`` with two edges ``S \rightarrow S'`` and ``A \rightarrow S'``. Paths and adversaries are defined similarly to fRMDPs. @@ -190,7 +190,7 @@ mdp = IntervalMarkovDecisionProcess(prob, num_actions, initial_states) It is possible to skip defining actions when the transition is a guaranteed self-loop and is the last states in the ambiguity set. This is useful for defining target states in reachability problems. The example below has 3 states (as shown by the 3 rows) and 2 actions -(explictly defined by `num_actions = 2`). The last state is a target state with a guaranteed self-loop, i.e., the transition probabilities are ``P(3|3,a) = 1`` for both actions ``a \in \{1, 2\}``. +(explictly defined by `num_actions = 2`). The last state is a target state with a guaranteed self-loop, i.e., the transition probabilities are ``P(3 | 3, a) = 1`` for both actions ``a \in \{1, 2\}``. ```@example using IntervalMDP # hide @@ -219,7 +219,7 @@ Formally, an odIMDP ``M`` with ``n`` marginals is a tuple ``M = (S, S_0, A, \Gam - ``S = S_1 \times \cdots \times S_n`` is a finite set of joint states with ``S_i`` being a finite set of states for the ``i``-th marginal, - ``S_0 \subseteq S`` is a set of initial states, - ``A`` is a finite set of actions, -- ``\Gamma = \{\Gamma_{s,a}\}_{s\in S,a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\Gamma_{s,a} = \bigotimes_{i=1}^n \Gamma^i_{s,a}`` with ``\Gamma^i_{s,a}`` is an interval ambiguity set over the ``i``-th marginal, i.e. over ``S_i``. +- ``\Gamma = \{\Gamma_{s, a}\}_{s \in S, a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\Gamma_{s, a} = \bigotimes_{i=1}^n \Gamma^i_{s, a}`` with ``\Gamma^i_{s, a}`` is an interval ambiguity set over the ``i``-th marginal, i.e. over ``S_i``. An odIMDP is equivalent to an fRMDP where the dependency graph is ``\mathcal{G} = (\mathcal{V}, \mathcal{E})`` with ``\mathcal{V} = \{S_1, \ldots, S_n, A\} \cup \{S'_1, \ldots, S'_n\}`` and ``\mathcal{E} = \{(S_i, S'_j) : i, j = 1, \ldots, n\} \cup \{(A_i, S'_j) : j = 1, \ldots, m, i = 1, \ldots, n\}``. In other words, each next-state variable ``S'_i`` depends on all state and action variables and the dependency graph is a complete bipartite graph. Paths, strategies, and adversaries are defined similarly to fRMDPs. @@ -231,7 +231,7 @@ Formally, an fIMDP ``M`` with ``n`` marginals is a tuple ``M = (S, S_0, A, \math - ``S_0 \subseteq S`` is a set of initial states, - ``A`` is a finite set of actions, - ``\mathcal{G} = (\mathcal{V}, \mathcal{E})`` is a directed bipartite graph with nodes ``\mathcal{V} = \mathcal{V}_{ind} \cup \mathcal{V}_{cond} = \{S_1, \ldots, S_n, A_1, \ldots, A_m\} \cup \{S'_1, \ldots, S'_n\}`` representing the state and action variables and their next-state counterparts, and edges ``\mathcal{E} \subseteq \mathcal{V}_{ind} \times \mathcal{V}_{cond}`` representing dependencies of ``S'_i`` on ``S_j`` and ``A_k``, -- ``\Gamma = \{\Gamma_{s,a}\}_{s\in S,a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\Gamma_{s,a} = \bigotimes_{i=1}^n \Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` with ``\Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` is an interval ambiguity set over the ``i``-th marginal, i.e. over ``S_i``, conditional on the values in ``(s, a)`` of the parent variables ``\text{Pa}_\mathcal{G}(S'_i)`` of ``S'_i`` in ``\mathcal{G}``. +- ``\Gamma = \{\Gamma_{s, a}\}_{s \in S, a \in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\Gamma_{s, a} = \bigotimes_{i=1}^n \Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` with ``\Gamma^i_{\text{Pa}_\mathcal{G}(S'_i) \cap (s, a)}`` is an interval ambiguity set over the ``i``-th marginal, i.e. over ``S_i``, conditional on the values in ``(s, a)`` of the parent variables ``\text{Pa}_\mathcal{G}(S'_i)`` of ``S'_i`` in ``\mathcal{G}``. The example in [Factored RMDPs](@ref) is also an example of an fIMDP. diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl index fa68c16b..b8c910af 100644 --- a/src/models/FactoredRobustMarkovDecisionProcess.jl +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -17,15 +17,15 @@ Formally, a fRMDP ``M`` is a tuple ``M = (S, S_0, A, \\mathcal{G}, \\Gamma)``, w representing the state and action variables and their next-state counterparts, and edges ``\\mathcal{E} \\subseteq \\mathcal{V}_{ind} \\times \\mathcal{V}_{cond}`` representing dependencies of ``S'_i`` on ``S_j`` and ``A_k``, -- ``\\Gamma = \\{\\Gamma_{s,a}\\}_{s\\in S,a \\in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, - where each ``\\Gamma_{s,a} = \\bigotimes_{i=1}^n \\Gamma^i_{\\text{Pa}_\\mathcal{G}(S'_i) \\cap (s, a)}`` is +- ``\\Gamma = \\{\\Gamma_{s, a}\\}_{s \\in S, a \\in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, + where each ``\\Gamma_{s, a} = \\bigotimes_{i=1}^n \\Gamma^i_{\\text{Pa}_\\mathcal{G}(S'_i) \\cap (s, a)}`` is a product of ambiguity sets ``\\Gamma^i_{\\text{Pa}_\\mathcal{G}(S'_i) \\cap (s, a)}`` along each marginal ``i`` conditional on the values in ``(s, a)`` of the parent variables ``\\text{Pa}_\\mathcal{G}(S'_i)`` of ``S'_i`` in ``\\mathcal{G}``, i.e. ```math - \\Gamma_{s,a} = \\left\\{ \\gamma \\in \\mathcal{D}(S) \\,:\\, \\gamma(t) = \\prod_{i=1}^n \\gamma^i(t_i | s_{\\text{Pa}_{\\mathcal{G}_S}(S'_i)}, a_{\\text{Pa}_{\\mathcal{G}_A}(S'_i)}), \\, \\gamma^i(\\cdot | s_{\\text{Pa}_{\\mathcal{G}_S}(S'_i)}, a_{\\text{Pa}_{\\mathcal{G}_A}(S'_i)}) \\in \\Gamma^i_{\\text{Pa}_\\mathcal{G}(S'_i)} \\right\\}. + \\Gamma_{s, a} = \\left\\{ \\gamma \\in \\mathcal{D}(S) \\,:\\, \\gamma(t) = \\prod_{i=1}^n \\gamma^i(t_i | s_{\\text{Pa}_{\\mathcal{G}_S}(S'_i)}, a_{\\text{Pa}_{\\mathcal{G}_A}(S'_i)}), \\, \\gamma^i(\\cdot | s_{\\text{Pa}_{\\mathcal{G}_S}(S'_i)}, a_{\\text{Pa}_{\\mathcal{G}_A}(S'_i)}) \\in \\Gamma^i_{\\text{Pa}_\\mathcal{G}(S'_i)} \\right\\}. ``` -For a given source-action pair ``(s, a) \\in S \\times A``, any distribution ``\\gamma_{s, a} \\in \\Gamma_{s,a}`` is called a feasible distribution, +For a given source-action pair ``(s, a) \\in S \\times A``, any distribution ``\\gamma_{s, a} \\in \\Gamma_{s, a}`` is called a feasible distribution, and feasible transitions are triplets ``(s, a, t) \\in S \\times A \\times S`` where ``t \\in \\mathop{supp}(\\gamma_{s, a})`` for any feasible distribution ``\\gamma_{s, a} \\in \\Gamma_{s, a}``. ### Type parameters diff --git a/src/models/IntervalMarkovChain.jl b/src/models/IntervalMarkovChain.jl index 6f9978f0..8bc803be 100644 --- a/src/models/IntervalMarkovChain.jl +++ b/src/models/IntervalMarkovChain.jl @@ -21,11 +21,54 @@ end IntervalMarkovChain(ambiguity_set::IntervalAmbiguitySets, initial_states=AllStates()) A convenience constructor for a [`FactoredRobustMarkovDecisionProcess`](@ref) representing an interval Markov chain -from a single [`IntervalAmbiguitySets`](@ref) object. See [IMCs](@ref) for the formal definition. +from a single [`IntervalAmbiguitySets`](@ref) object, as IMCs are a subclass of fRMDPs. -!!! todo - Add example +Formally, an IMC ``M`` is a tuple ``M = (S, S_0, \\Gamma)``, where +- ``S`` is a finite set of states, +- ``S_0 \\subseteq S`` is a set of initial states, +- ``\\Gamma = \\{\\Gamma_{s}\\}_{s \\in S}`` is a set of ambiguity sets for source state ``s``, + where each ``\\Gamma_{s}`` is an _interval_ ambiguity set over ``S``. + +Notice also that an IMC is an [`IntervalMarkovDecisionProcess`](@ref) with a single action. + +### Example +```jldoctest +using IntervalMDP + +prob = IntervalAmbiguitySets(; + lower = [ + 0 1/2 0 + 1/10 3/10 0 + 1/5 1/10 1 + ], + upper = [ + 1/2 7/10 0 + 3/5 1/2 0 + 7/10 3/10 1 + ], +) + +initial_states = [1] +mc = IntervalMarkovChain(prob, initial_states) + +# output + +FactoredRobustMarkovDecisionProcess +├─ 1 state variables with cardinality: (3,) +├─ 1 action variables with cardinality: (1,) +├─ Initial states: [1] +├─ Transition marginals: +│ └─ Marginal 1: +│ ├─ Conditional variables: states = (1,), actions = (1,) +│ └─ Ambiguity set type: Interval (dense, Matrix{Float64}) +└─Inferred properties + ├─Model type: Interval MDP + ├─Number of states: 3 + ├─Number of actions: 1 + ├─Default model checking algorithm: Robust Value Iteration + └─Default Bellman operator algorithm: O-Maximization +``` """ function IntervalMarkovChain(ambiguity_set::IntervalAmbiguitySets, initial_states=AllStates()) source_dims = (num_sets(ambiguity_set),) diff --git a/src/models/IntervalMarkovDecisionProcess.jl b/src/models/IntervalMarkovDecisionProcess.jl index 4908e82f..3a71c139 100644 --- a/src/models/IntervalMarkovDecisionProcess.jl +++ b/src/models/IntervalMarkovDecisionProcess.jl @@ -1,3 +1,15 @@ +""" +Convenience constructors for a [`FactoredRobustMarkovDecisionProcess`](@ref) representing an interval Markov decision process, +as IMDPs are a subclass of fRMDPs. +Formally, an IMDP ``M`` is a tuple ``M = (S, S_0, A, \\Gamma)``, where + +- ``S`` is a finite set of states, +- ``S_0 \\subseteq S`` is a set of initial states, +- ``A`` is a finite set of actions, +- ```\\Gamma = \\{\\Gamma_{s,a}\\}_{s \\in S,a \\in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\\Gamma_{s,a}`` is an interval ambiguity set over ``S``. +""" +function IntervalMarkovDecisionProcess end + function IntervalMarkovDecisionProcess(marginal::Marginal{<:IntervalAmbiguitySets}, initial_states::InitialStates = AllStates()) state_vars = (Int32(num_target(marginal)),) action_vars = action_shape(marginal) @@ -17,10 +29,71 @@ end IntervalMarkovDecisionProcess(ambiguity_set::IntervalAmbiguitySets, num_actions::Integer, initial_states::InitialStates = AllStates()) A convenience constructor for a [`FactoredRobustMarkovDecisionProcess`](@ref) representing an interval Markov decision process -from a single [`IntervalAmbiguitySets`](@ref) object and a specified number of actions. See [IMDPs](@ref) for the formal definition. - -!!! todo - Add example +from a single [`IntervalAmbiguitySets`](@ref) object and a specified number of actions. + +### Example +```jldoctest +using IntervalMDP + +prob1 = IntervalAmbiguitySets(; + lower = [ + 0 1/2 + 1/10 3/10 + 1/5 1/10 + ], + upper = [ + 1/2 7/10 + 3/5 1/2 + 7/10 3/10 + ], +) + +prob2 = IntervalAmbiguitySets(; + lower = [ + 1/10 1/5 + 1/5 3/10 + 3/10 2/5 + ], + upper = [ + 3/5 3/5 + 1/2 1/2 + 2/5 2/5 + ], +) + +prob3 = IntervalAmbiguitySets(; + lower = Float64[ + 0 0 + 0 0 + 1 1 + ], + upper = Float64[ + 0 0 + 0 0 + 1 1 + ] +) + +initial_states = [1] +mdp = IntervalMarkovDecisionProcess([prob1, prob2, prob3], initial_states) + +# output + +FactoredRobustMarkovDecisionProcess +├─ 1 state variables with cardinality: (3,) +├─ 1 action variables with cardinality: (2,) +├─ Initial states: [1] +├─ Transition marginals: +│ └─ Marginal 1: +│ ├─ Conditional variables: states = (1,), actions = (1,) +│ └─ Ambiguity set type: Interval (dense, Matrix{Float64}) +└─Inferred properties + ├─Model type: Interval MDP + ├─Number of states: 3 + ├─Number of actions: 2 + ├─Default model checking algorithm: Robust Value Iteration + └─Default Bellman operator algorithm: O-Maximization +``` """ function IntervalMarkovDecisionProcess(ambiguity_set::IntervalAmbiguitySets, num_actions::Integer, initial_states::InitialStates = AllStates()) @@ -40,10 +113,45 @@ end A convenience constructor for a [`FactoredRobustMarkovDecisionProcess`](@ref) representing an interval Markov decision process from a vector of [`IntervalAmbiguitySets`](@ref) objects, one for each state and with the same number of actions in each. -See [IMDPs](@ref) for the formal definition. -!!! todo - Add example +### Example +```jldoctest +using IntervalMDP + +prob = IntervalAmbiguitySets(; + lower = [ + 0 1/2 1/10 1/5 0 0 + 1/10 3/10 1/5 3/10 0 0 + 1/5 1/10 3/10 2/5 1 1 + ], + upper = [ + 1/2 7/10 3/5 2/5 0 0 + 3/5 1/2 1/2 2/5 0 0 + 7/10 3/10 2/5 2/5 1 1 + ], +) + +num_actions = 2 +initial_states = [1] +mdp = IntervalMarkovDecisionProcess(prob, num_actions, initial_states) + +# output + +FactoredRobustMarkovDecisionProcess +├─ 1 state variables with cardinality: (3,) +├─ 1 action variables with cardinality: (2,) +├─ Initial states: [1] +├─ Transition marginals: +│ └─ Marginal 1: +│ ├─ Conditional variables: states = (1,), actions = (1,) +│ └─ Ambiguity set type: Interval (dense, Matrix{Float64}) +└─Inferred properties + ├─Model type: Interval MDP + ├─Number of states: 3 + ├─Number of actions: 2 + ├─Default model checking algorithm: Robust Value Iteration + └─Default Bellman operator algorithm: O-Maximization +``` """ function IntervalMarkovDecisionProcess( diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index c116e4bc..a39d14bc 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -1,11 +1,8 @@ """ IntervalAmbiguitySets{R, MR <: AbstractMatrix{R}} -!!! todo - Update description - -A matrix pair to represent the lower and upper bound of `num_sets(ambiguity_set)` interval ambiguity sets (on the columns) -to `num_target(ambiguity_set)` destinations (on the rows). [Marginal](@ref) adds interpretation to the column indices. +A matrix pair to represent the lower and upper bound of `num_sets(ambiguity_sets)` interval ambiguity sets (on the columns) +to `num_target(ambiguity_sets)` destinations (on the rows). [Marginal](@ref) adds interpretation to the column indices. The matrices can be `Matrix{R}` or `SparseMatrixCSC{R}`, or their CUDA equivalents. Due to the space complexity, if modelling [IntervalMarkovChains](@ref IntervalMarkovChain) or [IntervalMarkovDecisionProcesses](@ref IntervalMarkovDecisionProcess), it is recommended to use sparse matrices. @@ -17,8 +14,8 @@ The lower bound is explicitly stored, while the upper bound is computed from the because it simplifies repeated probability assignment using O-maximization [givan2000bounded, lahijanian2015formal](@cite). ### Fields -- `lower::MR`: The lower bound probabilities for `num_sets(ambiguity_set)` ambiguity sets to `num_target(ambiguity_set)` target states. -- `gap::MR`: The gap between upper and lower bound transition probabilities for `num_sets(ambiguity_set)` ambiguity sets to `num_target(ambiguity_set)` target states. +- `lower::MR`: The lower bound probabilities for `num_sets(ambiguity_sets)` ambiguity sets to `num_target(ambiguity_sets)` target states. +- `gap::MR`: The gap between upper and lower bound transition probabilities for `num_sets(ambiguity_sets)` ambiguity sets to `num_target(ambiguity_sets)` target states. ### Examples ```jldoctest From 3c8f57a74a50a141428de67a1294a51f20f85921 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Sat, 27 Sep 2025 21:26:53 +0200 Subject: [PATCH 47/71] Improve docstrings for IMCs and IMDPs --- src/models/IntervalMarkovChain.jl | 4 ++-- src/models/IntervalMarkovDecisionProcess.jl | 23 ++++++++------------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/models/IntervalMarkovChain.jl b/src/models/IntervalMarkovChain.jl index 8bc803be..8a1b98e7 100644 --- a/src/models/IntervalMarkovChain.jl +++ b/src/models/IntervalMarkovChain.jl @@ -20,8 +20,8 @@ end """ IntervalMarkovChain(ambiguity_set::IntervalAmbiguitySets, initial_states=AllStates()) -A convenience constructor for a [`FactoredRobustMarkovDecisionProcess`](@ref) representing an interval Markov chain -from a single [`IntervalAmbiguitySets`](@ref) object, as IMCs are a subclass of fRMDPs. +A convenience constructor for a [`FactoredRobustMarkovDecisionProcess`](@ref) representing an interval Markov chain, +as IMCs are a subclass of fRMDPs, from a single [`IntervalAmbiguitySets`](@ref) object. Formally, an IMC ``M`` is a tuple ``M = (S, S_0, \\Gamma)``, where diff --git a/src/models/IntervalMarkovDecisionProcess.jl b/src/models/IntervalMarkovDecisionProcess.jl index 3a71c139..1e638e10 100644 --- a/src/models/IntervalMarkovDecisionProcess.jl +++ b/src/models/IntervalMarkovDecisionProcess.jl @@ -1,15 +1,3 @@ -""" -Convenience constructors for a [`FactoredRobustMarkovDecisionProcess`](@ref) representing an interval Markov decision process, -as IMDPs are a subclass of fRMDPs. -Formally, an IMDP ``M`` is a tuple ``M = (S, S_0, A, \\Gamma)``, where - -- ``S`` is a finite set of states, -- ``S_0 \\subseteq S`` is a set of initial states, -- ``A`` is a finite set of actions, -- ```\\Gamma = \\{\\Gamma_{s,a}\\}_{s \\in S,a \\in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\\Gamma_{s,a}`` is an interval ambiguity set over ``S``. -""" -function IntervalMarkovDecisionProcess end - function IntervalMarkovDecisionProcess(marginal::Marginal{<:IntervalAmbiguitySets}, initial_states::InitialStates = AllStates()) state_vars = (Int32(num_target(marginal)),) action_vars = action_shape(marginal) @@ -28,8 +16,15 @@ end """ IntervalMarkovDecisionProcess(ambiguity_set::IntervalAmbiguitySets, num_actions::Integer, initial_states::InitialStates = AllStates()) -A convenience constructor for a [`FactoredRobustMarkovDecisionProcess`](@ref) representing an interval Markov decision process -from a single [`IntervalAmbiguitySets`](@ref) object and a specified number of actions. +A convenience constructor for a [`FactoredRobustMarkovDecisionProcess`](@ref) representing an interval Markov decision process, +as IMDPs are a subclass of fRMDPs, from a single [`IntervalAmbiguitySets`](@ref) object and a specified number of actions. + +Formally, an IMDP ``M`` is a tuple ``M = (S, S_0, A, \\Gamma)``, where + +- ``S`` is a finite set of states, +- ``S_0 \\subseteq S`` is a set of initial states, +- ``A`` is a finite set of actions, +- ```\\Gamma = \\{\\Gamma_{s,a}\\}_{s \\in S,a \\in A}`` is a set of ambiguity sets for source-action pair ``(s, a)``, where each ``\\Gamma_{s,a}`` is an _interval_ ambiguity set over ``S``. ### Example ```jldoctest From facd2fb9f435702c824f07d20b0d6374d317547b Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Sat, 27 Sep 2025 21:28:10 +0200 Subject: [PATCH 48/71] Move TODOs of DFA --- src/models/DFA.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/models/DFA.jl b/src/models/DFA.jl index febbbcac..1fd1e422 100644 --- a/src/models/DFA.jl +++ b/src/models/DFA.jl @@ -24,15 +24,15 @@ See [`TransitionFunction`](@ref) for more information on the structure of the tr - `accepting_states::VT`: vector of accepting states - `labelmap::DA`: mapping from label to index. -TODO: Add explicit sink states for non-accepting self-looping states since we do not need to iterate for these. -TODO: Detection of non-accepting end components. They can be replaced by a single state. - """ struct DFA{T <: TransitionFunction, DA <: AbstractDict{String, Int32}} <: DeterministicAutomaton transition::T # delta : |Q| x |2^{AP}| => |Q| initial_state::Int32 # q_0 labelmap::DA + + # TODO: Add explicit sink states for non-accepting self-looping states since we do not need to iterate for these. + # TODO: Detection of non-accepting end components. They can be replaced by a single state. function DFA( transition::T, From 4373ac5ea71bfdb23097fbbb08332f9f22b67521 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Sun, 28 Sep 2025 16:33:27 +0200 Subject: [PATCH 49/71] Fix O-max bellman for small CuSparse. --- ext/IntervalMDPCudaExt.jl | 2 +- ext/cuda/array.jl | 4 + ext/cuda/bellman/dense.jl | 44 ++-- ext/cuda/bellman/sparse.jl | 501 ++++++++++++++++++----------------- test/cuda/sparse/bellman.jl | 508 ++++++++++++++++-------------------- 5 files changed, 522 insertions(+), 537 deletions(-) diff --git a/ext/IntervalMDPCudaExt.jl b/ext/IntervalMDPCudaExt.jl index f4a45798..0541b3cb 100644 --- a/ext/IntervalMDPCudaExt.jl +++ b/ext/IntervalMDPCudaExt.jl @@ -91,7 +91,7 @@ include("cuda/sorting.jl") include("cuda/workspace.jl") include("cuda/strategy.jl") include("cuda/bellman/dense.jl") -# include("cuda/bellman/sparse.jl") +include("cuda/bellman/sparse.jl") include("cuda/probabilities.jl") include("cuda/specification.jl") diff --git a/ext/cuda/array.jl b/ext/cuda/array.jl index 0aa6816a..5d9014f2 100644 --- a/ext/cuda/array.jl +++ b/ext/cuda/array.jl @@ -42,3 +42,7 @@ Adapt.adapt_storage(::Type{<:IntervalMDP.CpuModelAdaptor{Tv1}}, x::CuArray{Tv2}) Adapt.adapt_storage(::Type{<:IntervalMDP.CpuModelAdaptor{Tv1}}, x::CuArray{NTuple{N, T}}) where {Tv1, N, T <: Integer} = adapt(Array{NTuple{N, T}}, x) + +const CuSparseDeviceColumnView{Tv, Ti} = SubArray{Tv, 1, <:CuSparseDeviceMatrixCSC{Tv, Ti}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} +IntervalMDP.support(p::IntervalMDP.IntervalAmbiguitySet{R, <:CuSparseDeviceColumnView{R}}) where {R} = rowvals(p.gap) +IntervalMDP.supportsize(p::IntervalMDP.IntervalAmbiguitySet{R, <:CuSparseDeviceColumnView{R}}) where {R} = nnz(p.gap) \ No newline at end of file diff --git a/ext/cuda/bellman/dense.jl b/ext/cuda/bellman/dense.jl index b48af781..bad7540e 100644 --- a/ext/cuda/bellman/dense.jl +++ b/ext/cuda/bellman/dense.jl @@ -69,10 +69,10 @@ function dense_bellman_kernel!( action_reduce, ) where {Tv} # Prepare action workspace shared memory - action_workspace = initialize_action_workspace(workspace, strategy_cache, V) + action_workspace = initialize_dense_action_workspace(workspace, strategy_cache, V) # Prepare sorting shared memory - value, perm = initialize_value_and_perm(workspace, strategy_cache, V, marginal) + value, perm = initialize_dense_value_and_perm(workspace, strategy_cache, V, marginal) # Perform sorting dense_initialize_sorting_shared_memory!(V, value, perm) @@ -94,7 +94,7 @@ function dense_bellman_kernel!( return nothing end -@inline function initialize_action_workspace( +@inline function initialize_dense_action_workspace( workspace, ::OptimizingActiveCache, marginal @@ -103,10 +103,12 @@ end nwarps = div(blockDim().x, warpsize()) wid = fld1(threadIdx().x, warpsize()) action_workspace = CuDynamicSharedArray(IntervalMDP.valuetype(marginal), (workspace.num_actions, nwarps)) - @inbounds return @view action_workspace[:, wid] + @inbounds action_workspace = @view action_workspace[:, wid] + + return action_workspace end -@inline function initialize_action_workspace( +@inline function initialize_dense_action_workspace( workspace, ::NonOptimizingActiveCache, marginal @@ -114,7 +116,7 @@ end return nothing end -@inline function initialize_value_and_perm( +@inline function initialize_dense_value_and_perm( workspace, ::OptimizingActiveCache, V::AbstractVector{Tv}, @@ -128,7 +130,7 @@ end return value, perm end -@inline function initialize_value_and_perm( +@inline function initialize_dense_value_and_perm( workspace, ::NonOptimizingActiveCache, V::AbstractVector{Tv}, @@ -200,7 +202,6 @@ end ) where {Tv} assume(warpsize() == 32) lane = mod1(threadIdx().x, warpsize()) - nwarps = div(blockDim().x, warpsize()) jₐ = one(Int32) @inbounds while jₐ <= action_shape(marginal)[1] @@ -255,25 +256,26 @@ end end @inline function state_action_dense_omaximization!( - V, + V::AbstractVector{R}, value, perm, - ambiguity_set::IntervalMDP.IntervalAmbiguitySet{R, MR}, + ambiguity_set, lane, -) where {R, MR <: AbstractArray} +) where {R} assume(warpsize() == 32) - warp_aligned_length = kernel_nextwarp(IntervalMDP.supportsize(ambiguity_set)) + warp_aligned_length = kernel_nextwarp(num_target(ambiguity_set)) used = zero(R) - gap_value = zero(R) + res_value = zero(R) # Add the lower bound multiplied by the value s = lane @inbounds while s <= warp_aligned_length # Find index of the permutation, and lookup the corresponding lower bound and multipy by the value - if s <= IntervalMDP.supportsize(ambiguity_set) - gap_value += lower(ambiguity_set, s) * V[s] - used += lower(ambiguity_set, s) + if s <= num_target(ambiguity_set) + l = lower(ambiguity_set, s) + res_value += l * V[s] + used += l end s += warpsize() end @@ -285,7 +287,7 @@ end s = lane @inbounds while s <= warp_aligned_length # Find index of the permutation, and lookup the corresponding gap - g = if s <= IntervalMDP.supportsize(ambiguity_set) + g = if s <= num_target(ambiguity_set) gap(ambiguity_set, perm[s]) else # 0 gap is a neural element @@ -300,9 +302,9 @@ end remaining += g # Update the probability - if s <= IntervalMDP.supportsize(ambiguity_set) + if s <= num_target(ambiguity_set) g = clamp(remaining, zero(R), g) - gap_value += g * value[s] + res_value += g * value[s] remaining -= g end @@ -318,6 +320,6 @@ end end sync_warp() - gap_value = CUDA.reduce_warp(+, gap_value) - return gap_value + res_value = CUDA.reduce_warp(+, res_value) + return res_value end \ No newline at end of file diff --git a/ext/cuda/bellman/sparse.jl b/ext/cuda/bellman/sparse.jl index 13227f09..a0e38abb 100644 --- a/ext/cuda/bellman/sparse.jl +++ b/ext/cuda/bellman/sparse.jl @@ -1,10 +1,9 @@ function IntervalMDP._bellman_helper!( workspace::CuSparseOMaxWorkspace, strategy_cache::IntervalMDP.AbstractStrategyCache, - Vres, - V, - prob::IntervalProbabilities{Tv}, - stateptr; + Vres::AbstractVector{Tv}, + V::AbstractVector{Tv}, + model; upper_bound = false, maximize = true, ) where {Tv} @@ -17,114 +16,119 @@ function IntervalMDP._bellman_helper!( strategy_cache, Vres, V, - prob, - stateptr; - upper_bound = upper_bound, - maximize = maximize, - ) - return Vres - end - - # Try if we can fit all values and gaps into shared memory - if try_large_sparse_bellman!( - Tv, - Tv, - workspace, - strategy_cache, - Vres, - V, - prob, - stateptr; - upper_bound = upper_bound, - maximize = maximize, - ) - return Vres - end - - # Try if we can fit all values and permutation indices into shared memory (25% less memory relative to (Tv, Tv)) - if try_large_sparse_bellman!( - Tv, - Int32, - workspace, - strategy_cache, - Vres, - V, - prob, - stateptr; + model; upper_bound = upper_bound, maximize = maximize, ) return Vres end - # Try if we can fit permutation indices into shared memory (50% less memory relative to (Tv, Tv)) - if try_large_sparse_bellman!( - Int32, - Int32, - workspace, - strategy_cache, - Vres, - V, - prob, - stateptr; - upper_bound = upper_bound, - maximize = maximize, - ) - return Vres - end - - throw(IntervalMDP.OutOfSharedMemory(workspace.max_nonzeros * 2 * sizeof(Int32))) + # # Try if we can fit all values and gaps into shared memory + # if try_large_sparse_bellman!( + # Tv, + # Tv, + # workspace, + # strategy_cache, + # Vres, + # V, + # model; + # upper_bound = upper_bound, + # maximize = maximize, + # ) + # return Vres + # end + + # # Try if we can fit all values and permutation indices into shared memory (25% less memory relative to (Tv, Tv)) + # if try_large_sparse_bellman!( + # Tv, + # Int32, + # workspace, + # strategy_cache, + # Vres, + # V, + # model; + # upper_bound = upper_bound, + # maximize = maximize, + # ) + # return Vres + # end + + # # Try if we can fit permutation indices into shared memory (50% less memory relative to (Tv, Tv)) + # if try_large_sparse_bellman!( + # Int32, + # Int32, + # workspace, + # strategy_cache, + # Vres, + # V, + # model; + # upper_bound = upper_bound, + # maximize = maximize, + # ) + # return Vres + # end + + throw(IntervalMDP.OutOfSharedMemory(workspace.max_support * 2 * sizeof(Int32))) end function try_small_sparse_bellman!( workspace::CuSparseOMaxWorkspace, strategy_cache::IntervalMDP.AbstractStrategyCache, - Vres, - V, - prob::IntervalProbabilities{Tv}, - stateptr; + Vres::AbstractVector{Tv}, + V::AbstractVector{Tv}, + model; upper_bound = false, maximize = true, ) where {Tv} # Execution plan: - # - at least 8 states per block # - one warp per state + # - squeeze as many states as possible in a block # - use shared memory to store the values and gap probability # - use bitonic sort in a warp to sort values_gaps - desired_warps = 8 - shmem = - (workspace.max_nonzeros + workspace.max_actions) * 2 * sizeof(Tv) * desired_warps + n_actions = isa(strategy_cache, IntervalMDP.OptimizingStrategyCache) ? workspace.num_actions : 1 + marginal = marginals(model)[1] + n_states = source_shape(marginal)[1] + + if IntervalMDP.valuetype(marginal) != Tv + throw(ArgumentError("Value type of the model ($(IntervalMDP.valuetype(marginal))) does not match the value type of the input vector ($Tv).")) + end + + desired_warps = 32 kernel = @cuda launch = false small_sparse_bellman_kernel!( workspace, active_cache(strategy_cache), Vres, V, - prob, - stateptr, + marginal, upper_bound ? (>=) : (<=), maximize ? (max, >, typemin(Tv)) : (min, <, typemax(Tv)), ) - config = launch_configuration(kernel.fun; shmem = shmem) - max_threads = prevwarp(device(), config.threads) + function variable_shmem(threads) + warp_aligned_length = prevwarp(device(), threads) + return (workspace.max_support + n_actions) * 2 * sizeof(Tv) * warp_aligned_length + end + + config = launch_configuration(kernel.fun; shmem = variable_shmem) - if max_threads < desired_warps * 32 + max_threads = prevwarp(device(), config.threads) + if max_threads < 32 return false end - num_states = length(stateptr) - one(Int32) - threads = desired_warps * 32 - blocks = min(2^16 - 1, cld(num_states, desired_warps)) + threads = max_threads + warps = div(threads, 32) + blocks = min(2^16 - 1, cld(n_states, warps)) + shmem = (workspace.max_support + n_actions) * 2 * sizeof(Tv) * warps kernel( workspace, active_cache(strategy_cache), Vres, V, - prob, - stateptr, + marginal, upper_bound ? (>=) : (<=), maximize ? (max, >, typemin(Tv)) : (min, <, typemax(Tv)); blocks = blocks, @@ -138,38 +142,21 @@ end function small_sparse_bellman_kernel!( workspace, strategy_cache, - Vres, + Vres::AbstractVector{Tv}, V, - prob::IntervalProbabilities{Tv}, - stateptr, + marginal, value_lt, action_reduce, ) where {Tv} assume(warpsize() == 32) - nwarps = div(blockDim().x, warpsize()) - action_workspace = CuDynamicSharedArray(Tv, (workspace.max_actions, nwarps)) - value_ws = CuDynamicSharedArray( - Tv, - (workspace.max_nonzeros, nwarps), - workspace.max_actions * nwarps * sizeof(Tv), - ) - gap_ws = CuDynamicSharedArray( - Tv, - (workspace.max_nonzeros, nwarps), - (workspace.max_nonzeros + workspace.max_actions) * nwarps * sizeof(Tv), - ) + action_workspace = initialize_small_sparse_action_workspace(workspace, strategy_cache, marginal) + value_ws, gap_ws = initialize_small_sparse_value_and_gap(workspace, strategy_cache, V, marginal) + nwarps = div(blockDim().x, warpsize()) wid = fld1(threadIdx().x, warpsize()) - - @inbounds action_workspace = @view action_workspace[:, wid] - @inbounds value_ws = @view value_ws[:, wid] - @inbounds gap_ws = @view gap_ws[:, wid] - - # Grid-stride loop - num_states = length(stateptr) - one(Int32) - j = wid + (blockIdx().x - one(Int32)) * nwarps - while j <= num_states + jₛ = wid + (blockIdx().x - one(Int32)) * nwarps + @inbounds while jₛ <= source_shape(marginal)[1] # Grid-stride loop state_small_sparse_omaximization!( action_workspace, value_ws, @@ -177,207 +164,257 @@ function small_sparse_bellman_kernel!( strategy_cache, Vres, V, - prob, - stateptr, + marginal, value_lt, action_reduce, - j, + jₛ, ) - j += gridDim().x * nwarps + jₛ += gridDim().x * nwarps end + + return nothing end -@inline function state_small_sparse_omaximization!( - action_workspace, - value_ws, - gap_ws, - strategy_cache, - Vres, - V, - prob, - stateptr, - value_lt, - action_reduce, - jₛ, +@inline function initialize_small_sparse_action_workspace( + workspace, + ::OptimizingActiveCache, + marginal ) - lane = mod1(threadIdx().x, warpsize()) + assume(warpsize() == 32) + nwarps = div(blockDim().x, warpsize()) + wid = fld1(threadIdx().x, warpsize()) - s₁, s₂ = stateptr[jₛ], stateptr[jₛ + one(Int32)] - nactions = s₂ - s₁ - @inbounds action_values = @view action_workspace[1:nactions] + action_workspace = CuDynamicSharedArray(IntervalMDP.valuetype(marginal), (workspace.num_actions, nwarps)) + @inbounds action_workspace = @view action_workspace[:, wid] - k = one(Int32) - @inbounds while k <= nactions - jₐ = s₁ + k - one(Int32) - sum_lowerⱼ = sum_lower(prob)[jₐ] + return action_workspace +end - r = lower(prob).colPtr[jₐ]:(lower(prob).colPtr[jₐ + one(Int32)] - one(Int32)) - lindsⱼ = @view lower(prob).rowVal[r] - lvalsⱼ = @view lower(prob).nzVal[r] +@inline function initialize_small_sparse_action_workspace( + workspace, + ::NonOptimizingActiveCache, + marginal +) + return nothing +end - r = gap(prob).colPtr[jₐ]:(gap(prob).colPtr[jₐ + one(Int32)] - one(Int32)) - gindsⱼ = @view gap(prob).rowVal[r] - gvalsⱼ = @view gap(prob).nzVal[r] +@inline function initialize_small_sparse_value_and_gap( + workspace, + ::OptimizingActiveCache, + V::AbstractVector{Tv}, + marginal +) where {Tv} + assume(warpsize() == 32) + nwarps = div(blockDim().x, warpsize()) + wid = fld1(threadIdx().x, warpsize()) + Tv2 = IntervalMDP.valuetype(marginal) - # Use O-maxmization to find the value for the action - v = state_action_small_sparse_omaximization!( - value_ws, - gap_ws, - V, - lindsⱼ, - lvalsⱼ, - gindsⱼ, - gvalsⱼ, - sum_lowerⱼ, - value_lt, - lane, - ) + value_ws = CuDynamicSharedArray(Tv, (workspace.max_support, nwarps), workspace.num_actions * nwarps * sizeof(Tv2)) + @inbounds value_ws = @view value_ws[:, wid] - if lane == one(Int32) - action_values[k] = v - end - sync_warp() + gap_ws = CuDynamicSharedArray(Tv, (workspace.max_support, nwarps), workspace.num_actions * nwarps * sizeof(Tv2) + workspace.max_support * nwarps * sizeof(Tv)) + @inbounds gap_ws = @view gap_ws[:, wid] - k += one(Int32) - end + return value_ws, gap_ws +end - # Find the best action - v = extract_strategy_warp!(strategy_cache, action_values, Vres, jₛ, action_reduce, lane) +@inline function initialize_small_sparse_value_and_gap( + workspace, + ::NonOptimizingActiveCache, + V::AbstractVector{Tv}, + marginal +) where {Tv} + assume(warpsize() == 32) + nwarps = div(blockDim().x, warpsize()) + wid = fld1(threadIdx().x, warpsize()) - if lane == one(Int32) - Vres[jₛ] = v - end - sync_warp() + value_ws = CuDynamicSharedArray(Tv, (workspace.max_support, nwarps)) + @inbounds value_ws = @view value_ws[:, wid] + + gap_ws = CuDynamicSharedArray(Tv, (workspace.max_support, nwarps), workspace.max_support * nwarps * sizeof(Tv)) + @inbounds gap_ws = @view gap_ws[:, wid] + return value_ws, gap_ws end @inline function state_small_sparse_omaximization!( action_workspace, value_ws, gap_ws, - strategy_cache::NonOptimizingActiveCache, + strategy_cache, Vres, V, - prob, - stateptr, + marginal, value_lt, action_reduce, jₛ, ) + assume(warpsize() == 32) lane = mod1(threadIdx().x, warpsize()) - @inbounds begin - s₁ = stateptr[jₛ] - jₐ = s₁ + strategy_cache[jₛ] - one(Int32) - sum_lowerⱼ = sum_lower(prob)[jₐ] - - r = lower(prob).colPtr[jₐ]:(lower(prob).colPtr[jₐ + one(Int32)] - one(Int32)) - lindsⱼ = @view lower(prob).rowVal[r] - lvalsⱼ = @view lower(prob).nzVal[r] - - r = gap(prob).colPtr[jₐ]:(gap(prob).colPtr[jₐ + one(Int32)] - one(Int32)) - gindsⱼ = @view gap(prob).rowVal[r] - gvalsⱼ = @view gap(prob).nzVal[r] + jₐ = one(Int32) + @inbounds while jₐ <= action_shape(marginal)[1] + ambiguity_set = marginal[(jₐ,), (jₛ,)] # Use O-maxmization to find the value for the action v = state_action_small_sparse_omaximization!( value_ws, gap_ws, V, - lindsⱼ, - lvalsⱼ, - gindsⱼ, - gvalsⱼ, - sum_lowerⱼ, + ambiguity_set, value_lt, - lane, + lane ) if lane == one(Int32) - Vres[jₛ] = v + action_workspace[jₐ] = v end sync_warp() + + jₐ += one(Int32) end + + # Find the best action + v = extract_strategy_warp!(strategy_cache, action_workspace, Vres, jₛ, action_reduce, lane) + + if lane == one(Int32) + Vres[jₛ] = v + end + sync_warp() end +# @inline function state_small_sparse_omaximization!( +# action_workspace, +# value_ws, +# gap_ws, +# strategy_cache::NonOptimizingActiveCache, +# Vres, +# V, +# prob, +# stateptr, +# value_lt, +# action_reduce, +# jₛ, +# ) +# lane = mod1(threadIdx().x, warpsize()) + +# @inbounds begin +# s₁ = stateptr[jₛ] +# jₐ = s₁ + strategy_cache[jₛ] - one(Int32) +# sum_lowerⱼ = sum_lower(prob)[jₐ] + +# r = lower(prob).colPtr[jₐ]:(lower(prob).colPtr[jₐ + one(Int32)] - one(Int32)) +# lindsⱼ = @view lower(prob).rowVal[r] +# lvalsⱼ = @view lower(prob).nzVal[r] + +# r = gap(prob).colPtr[jₐ]:(gap(prob).colPtr[jₐ + one(Int32)] - one(Int32)) +# gindsⱼ = @view gap(prob).rowVal[r] +# gvalsⱼ = @view gap(prob).nzVal[r] + +# # Use O-maxmization to find the value for the action +# v = state_action_small_sparse_omaximization!( +# value_ws, +# gap_ws, +# V, +# lindsⱼ, +# lvalsⱼ, +# gindsⱼ, +# gvalsⱼ, +# sum_lowerⱼ, +# value_lt, +# lane, +# ) + +# if lane == one(Int32) +# Vres[jₛ] = v +# end +# sync_warp() +# end +# end + @inline function state_action_small_sparse_omaximization!( value_ws, gap_ws, V, - lower_inds, - lower_vals, - gap_inds, - gap_vals, - sum_lower::Tv, + ambiguity_set, value_lt, - lane, -) where {Tv} - value = add_lower_mul_V_warp(V, lower_inds, lower_vals, lane) - - small_sparse_initialize_sorting_shared_memory!(V, gap_inds, gap_vals, value_ws, gap_ws) + lane +) + small_sparse_initialize_sorting_shared_memory!(V, ambiguity_set, value_ws, gap_ws, lane) - @inbounds valueⱼ = @view value_ws[1:length(gap_inds)] - @inbounds gapⱼ = @view gap_ws[1:length(gap_inds)] + @inbounds valueⱼ = @view value_ws[1:IntervalMDP.supportsize(ambiguity_set)] + @inbounds gapⱼ = @view gap_ws[1:IntervalMDP.supportsize(ambiguity_set)] warp_bitonic_sort!(valueⱼ, gapⱼ, value_lt) - value += small_add_gap_mul_V_sparse(valueⱼ, gapⱼ, sum_lower, lane) + value, remaining = add_lower_mul_V_warp(V, ambiguity_set, lane) + value += small_add_gap_mul_V_sparse(valueⱼ, gapⱼ, remaining, lane) return value end -@inline function add_lower_mul_V_warp( - V::AbstractVector{Tv}, - lower_inds, - lower_vals, - lane, -) where {Tv} +@inline function small_sparse_initialize_sorting_shared_memory!( + V, + ambiguity_set, + value, + prob, + lane +) assume(warpsize() == 32) + warp_aligned_length = kernel_nextwarp(IntervalMDP.supportsize(ambiguity_set)) - warp_aligned_length = kernel_nextwarp(length(lower_vals)) - lower_value = zero(Tv) + support = IntervalMDP.support(ambiguity_set) + # Copy into shared memory s = lane @inbounds while s <= warp_aligned_length - # Find index of the permutation, and lookup the corresponding lower bound and multipy by the value - val = if s <= length(lower_vals) - lower_vals[s] * V[lower_inds[s]] - else - zero(Tv) + if s <= IntervalMDP.supportsize(ambiguity_set) + idx = support[s] + value[s] = V[idx] + prob[s] = gap(ambiguity_set, idx) end - lower_value += val - s += warpsize() end - lower_value = CUDA.reduce_warp(+, lower_value) - return lower_value + # Need to synchronize to make sure all agree on the shared memory + sync_warp() end -@inline function small_sparse_initialize_sorting_shared_memory!( - V, - gapinds, - gapvals, - value, - prob, -) +@inline function add_lower_mul_V_warp( + V::AbstractVector{R}, + ambiguity_set, + lane, +) where {R} assume(warpsize() == 32) + warp_aligned_length = kernel_nextwarp(IntervalMDP.supportsize(ambiguity_set)) - # Copy into shared memory - i = mod1(threadIdx().x, warpsize()) - @inbounds while i <= length(gapinds) - value[i] = V[gapinds[i]] - prob[i] = gapvals[i] - i += warpsize() + used = zero(R) + lower_value = zero(R) + support = IntervalMDP.support(ambiguity_set) + + # Add the lower bound multiplied by the value + s = lane + @inbounds while s <= warp_aligned_length + # Find index of the permutation, and lookup the corresponding lower bound and multipy by the value + if s <= IntervalMDP.supportsize(ambiguity_set) + idx = support[s] + l = lower(ambiguity_set, idx) + lower_value += l * V[idx] + used += l + end + s += warpsize() end + used = CUDA.reduce_warp(+, used) + used = shfl_sync(0xffffffff, used, one(Int32)) + remaining = one(R) - used - # Need to synchronize to make sure all agree on the shared memory - sync_warp() + lower_value = CUDA.reduce_warp(+, lower_value) + + return lower_value, remaining end -@inline function small_add_gap_mul_V_sparse(value, prob, sum_lower::Tv, lane) where {Tv} +@inline function small_add_gap_mul_V_sparse(value, prob, remaining::Tv, lane) where {Tv} assume(warpsize() == 32) warp_aligned_length = kernel_nextwarp(length(prob)) - @inbounds remaining = one(Tv) - sum_lower gap_value = zero(Tv) s = lane @@ -424,10 +461,9 @@ function try_large_sparse_bellman!( ::Type{T2}, workspace::CuSparseOMaxWorkspace, strategy_cache::IntervalMDP.AbstractStrategyCache, - Vres, - V, - prob::IntervalProbabilities{Tv}, - stateptr; + Vres::AbstractVector{Tv}, + V::AbstractVector{Tv}, + model; upper_bound = false, maximize = true, ) where {Tv, T1, T2} @@ -437,7 +473,7 @@ function try_large_sparse_bellman!( # - use bitonic sort in a block to sort the values shmem = - workspace.max_nonzeros * (sizeof(T1) + sizeof(T2)) + + workspace.max_support * (sizeof(T1) + sizeof(T2)) + workspace.max_actions * sizeof(Tv) kernel = @cuda launch = false large_sparse_bellman_kernel!( @@ -460,7 +496,7 @@ function try_large_sparse_bellman!( return false end - wanted_threads = min(1024, nextwarp(device(), cld(workspace.max_nonzeros, 2))) + wanted_threads = min(1024, nextwarp(device(), cld(workspace.max_support, 2))) num_states = length(stateptr) - one(Int32) threads = min(max_threads, wanted_threads) @@ -490,20 +526,19 @@ function large_sparse_bellman_kernel!( ::Type{T2}, workspace, strategy_cache, - Vres, + Vres::AbstractVector{Tv}, V, - prob::IntervalProbabilities{Tv}, - stateptr, + marginal, value_lt, action_reduce, ) where {Tv, T1, T2} action_workspace = CuDynamicSharedArray(Tv, workspace.max_actions) value_ws = - CuDynamicSharedArray(T1, workspace.max_nonzeros, workspace.max_actions * sizeof(Tv)) + CuDynamicSharedArray(T1, workspace.max_support, workspace.max_actions * sizeof(Tv)) gap_ws = CuDynamicSharedArray( T2, - workspace.max_nonzeros, - workspace.max_nonzeros * sizeof(T1) + workspace.max_actions * sizeof(Tv), + workspace.max_support, + workspace.max_support * sizeof(T1) + workspace.max_actions * sizeof(Tv), ) # Grid-stride loop diff --git a/test/cuda/sparse/bellman.jl b/test/cuda/sparse/bellman.jl index 5b64cf91..0f20de34 100644 --- a/test/cuda/sparse/bellman.jl +++ b/test/cuda/sparse/bellman.jl @@ -3,319 +3,263 @@ using IntervalMDP, SparseArrays, CUDA using StatsBase using Random: MersenneTwister -for N in [Float32, Float64, Rational{BigInt}] +for N in [Float32, Float64] @testset "N = $N" begin - prob = IntervalProbabilities(; - lower = sparse(N[0 1//2; 1//10 3//10; 2//10 1//10]), - upper = sparse(N[5//10 7//10; 6//10 5//10; 7//10 3//10]), + prob = IntervalAmbiguitySets(; + lower = sparse(N[ + 0 1//2 + 1//10 3//10 + 2//10 1//10 + ]), + upper = sparse(N[ + 5//10 7//10 + 6//10 5//10 + 7//10 3//10 + ]), ) + prob = IntervalMDP.cu(prob) - V = N[1, 2, 3] + V = IntervalMDP.cu(N[1, 2, 3]) #### Maximization @testset "maximization" begin ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = zeros(N, 2) + Vres = CUDA.zeros(N, 2) IntervalMDP._bellman_helper!( ws, strategy_cache, Vres, V, - prob, - stateptr(prob); + prob; upper_bound = true, ) + Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing @test Vres ≈ N[27 // 10, 17 // 10] # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] - - ws = IntervalMDP.DenseWorkspace(gap(prob), 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = true, - ) - @test Vres ≈ N[27 // 10, 17 // 10] - - ws = IntervalMDP.ThreadedDenseWorkspace(gap(prob), 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = true, - ) - @test Vres ≈ N[27 // 10, 17 // 10] end #### Minimization @testset "minimization" begin ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = zeros(N, 2) + Vres = CUDA.zeros(N, 2) IntervalMDP._bellman_helper!( ws, strategy_cache, Vres, V, - prob, - stateptr(prob); + prob; upper_bound = false, ) + Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing @test Vres ≈ N[17 // 10, 15 // 10] # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3] - - ws = IntervalMDP.DenseWorkspace(gap(prob), 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = false, - ) - @test Vres ≈ N[17 // 10, 15 // 10] - - ws = IntervalMDP.ThreadedDenseWorkspace(gap(prob), 1) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob, - stateptr(prob); - upper_bound = false, - ) - @test Vres ≈ N[17 // 10, 15 // 10] end end end -#### Large matrices -@testset "large matrices" begin - function sample_sparse_interval_probabilities(rng, n, m, nnz_per_column) - prob_split = 1 / nnz_per_column - - rand_val_lower = rand(rng, Float64, nnz_per_column * m) .* prob_split - rand_val_upper = rand(rng, Float64, nnz_per_column * m) .* prob_split .+ prob_split - rand_index = collect(1:nnz_per_column) - - row_vals = Vector{Int32}(undef, nnz_per_column * m) - col_ptrs = Int32[1; collect(1:m) .* nnz_per_column .+ 1] - - for j in 1:m - StatsBase.seqsample_a!(rng, 1:n, rand_index) # Select nnz_per_column elements from 1:n - sort!(rand_index) - - row_vals[((j - 1) * nnz_per_column + 1):(j * nnz_per_column)] .= rand_index - end - - lower = SparseMatrixCSC{Float64, Int32}(n, m, col_ptrs, row_vals, rand_val_lower) - upper = SparseMatrixCSC{Float64, Int32}(n, m, col_ptrs, row_vals, rand_val_upper) - - prob = IntervalProbabilities(; lower = lower, upper = upper) - V = rand(rng, Float64, n) - - cuda_prob = IntervalMDP.cu(prob) - cuda_V = IntervalMDP.cu(V) - - return prob, V, cuda_prob, cuda_V - end - - # Many columns - @testset "many columns" begin - rng = MersenneTwister(55392) - - n = 100 - m = 1000000 # It has to be greater than 8 * 2^16 to exceed maximum grid size - nnz_per_column = 2 - prob, V, cuda_prob, cuda_V = - sample_sparse_interval_probabilities(rng, n, m, nnz_per_column) - - ws = IntervalMDP.construct_workspace(prob) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - V_cpu = zeros(Float64, m) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - V_cpu, - V, - prob, - stateptr(prob); - upper_bound = false, - ) - - ws = IntervalMDP.construct_workspace(cuda_prob) - strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) - V_gpu = CUDA.zeros(Float64, m) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - V_gpu, - cuda_V, - cuda_prob, - stateptr(cuda_prob); - upper_bound = false, - ) - V_gpu = Vector(V_gpu) # Convert to CPU for testing - - @test V_cpu ≈ V_gpu - end - - # Many non-zeros - @testset "many non-zeros" begin - rng = MersenneTwister(55392) - - n = 100000 - m = 10 - nnz_per_column = 1500 # It has to be greater than 187 to fill shared memory with 8 states per block. - prob, V, cuda_prob, cuda_V = - sample_sparse_interval_probabilities(rng, n, m, nnz_per_column) - - ws = IntervalMDP.construct_workspace(prob) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - V_cpu = zeros(Float64, m) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - V_cpu, - V, - prob, - stateptr(prob); - upper_bound = false, - ) - - ws = IntervalMDP.construct_workspace(cuda_prob) - strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) - V_gpu = CUDA.zeros(Float64, m) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - V_gpu, - cuda_V, - cuda_prob, - stateptr(cuda_prob); - upper_bound = false, - ) - V_gpu = Vector(V_gpu) # Convert to CPU for testing - - @test V_cpu ≈ V_gpu - end - - # More non-zeros - @testset "more non-zeros" begin - rng = MersenneTwister(55392) - - n = 100000 - m = 10 - nnz_per_column = 4000 # It has to be greater than 3800 to exceed shared memory for ff implementation - prob, V, cuda_prob, cuda_V = - sample_sparse_interval_probabilities(rng, n, m, nnz_per_column) - - ws = IntervalMDP.construct_workspace(prob) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - V_cpu = zeros(Float64, m) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - V_cpu, - V, - prob, - stateptr(prob); - upper_bound = false, - ) - - ws = IntervalMDP.construct_workspace(cuda_prob) - strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) - V_gpu = CUDA.zeros(Float64, m) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - V_gpu, - cuda_V, - cuda_prob, - stateptr(cuda_prob); - upper_bound = false, - ) - V_gpu = Vector(V_gpu) # Convert to CPU for testing - - @test V_cpu ≈ V_gpu - end - - # Most non-zeros - @testset "most non-zeros" begin - rng = MersenneTwister(55392) - - n = 100000 - m = 10 - nnz_per_column = 6000 # It has to be greater than 5800 to exceed shared memory for fi implementation - prob, V, cuda_prob, cuda_V = - sample_sparse_interval_probabilities(rng, n, m, nnz_per_column) - - ws = IntervalMDP.construct_workspace(prob) - strategy_cache = IntervalMDP.construct_strategy_cache(prob) - V_cpu = zeros(Float64, m) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - V_cpu, - V, - prob, - stateptr(prob); - upper_bound = false, - ) - - ws = IntervalMDP.construct_workspace(cuda_prob) - strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) - V_gpu = CUDA.zeros(Float64, m) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - V_gpu, - cuda_V, - cuda_prob, - stateptr(cuda_prob); - upper_bound = false, - ) - V_gpu = Vector(V_gpu) # Convert to CPU for testing - - @test V_cpu ≈ V_gpu - end - - # Too many non-zeros - @testset "too many non-zeros" begin - rng = MersenneTwister(55392) - - n = 100000 - m = 10 - nnz_per_column = 8000 # It has to be greater than 7800 to exceed shared memory for ii implementation - prob, V, cuda_prob, cuda_V = - sample_sparse_interval_probabilities(rng, n, m, nnz_per_column) - - ws = IntervalMDP.construct_workspace(cuda_prob) - strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) - V_gpu = CUDA.zeros(Float64, m) - @test_throws IntervalMDP.OutOfSharedMemory IntervalMDP._bellman_helper!( - ws, - strategy_cache, - V_gpu, - cuda_V, - cuda_prob, - stateptr(cuda_prob); - upper_bound = false, - ) - end -end +# #### Large matrices +# @testset "large matrices" begin +# function sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) +# prob_split = 1 / nnz_per_column + +# rand_val_lower = rand(rng, Float64, nnz_per_column * m) .* prob_split +# rand_val_upper = rand(rng, Float64, nnz_per_column * m) .* prob_split .+ prob_split +# rand_index = collect(1:nnz_per_column) + +# row_vals = Vector{Int32}(undef, nnz_per_column * m) +# col_ptrs = Int32[1; collect(1:m) .* nnz_per_column .+ 1] + +# for j in 1:m +# StatsBase.seqsample_a!(rng, 1:n, rand_index) # Select nnz_per_column elements from 1:n +# sort!(rand_index) + +# row_vals[((j - 1) * nnz_per_column + 1):(j * nnz_per_column)] .= rand_index +# end + +# lower = SparseMatrixCSC{Float64, Int32}(n, m, col_ptrs, row_vals, rand_val_lower) +# upper = SparseMatrixCSC{Float64, Int32}(n, m, col_ptrs, row_vals, rand_val_upper) + +# prob = IntervalAmbiguitySets(; lower = lower, upper = upper) +# V = rand(rng, Float64, n) + +# cuda_prob = IntervalMDP.cu(prob) +# cuda_V = IntervalMDP.cu(V) + +# return prob, V, cuda_prob, cuda_V +# end + +# # Many columns +# @testset "many columns" begin +# rng = MersenneTwister(55392) + +# n = 100 +# m = 1000000 # It has to be greater than 8 * 2^16 to exceed maximum grid size +# nnz_per_column = 2 +# prob, V, cuda_prob, cuda_V = +# sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) + +# ws = IntervalMDP.construct_workspace(prob) +# strategy_cache = IntervalMDP.construct_strategy_cache(prob) +# V_cpu = zeros(Float64, m) +# IntervalMDP._bellman_helper!( +# ws, +# strategy_cache, +# V_cpu, +# V, +# prob; +# upper_bound = false, +# ) + +# ws = IntervalMDP.construct_workspace(cuda_prob) +# strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) +# V_gpu = CUDA.zeros(Float64, m) +# IntervalMDP._bellman_helper!( +# ws, +# strategy_cache, +# V_gpu, +# cuda_V, +# cuda_prob; +# upper_bound = false, +# ) +# V_gpu = Vector(V_gpu) # Convert to CPU for testing + +# @test V_cpu ≈ V_gpu +# end + +# # Many non-zeros +# @testset "many non-zeros" begin +# rng = MersenneTwister(55392) + +# n = 100000 +# m = 10 +# nnz_per_column = 1500 # It has to be greater than 187 to fill shared memory with 8 states per block. +# prob, V, cuda_prob, cuda_V = +# sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) + +# ws = IntervalMDP.construct_workspace(prob) +# strategy_cache = IntervalMDP.construct_strategy_cache(prob) +# V_cpu = zeros(Float64, m) +# IntervalMDP._bellman_helper!( +# ws, +# strategy_cache, +# V_cpu, +# V, +# prob; +# upper_bound = false, +# ) + +# ws = IntervalMDP.construct_workspace(cuda_prob) +# strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) +# V_gpu = CUDA.zeros(Float64, m) +# IntervalMDP._bellman_helper!( +# ws, +# strategy_cache, +# V_gpu, +# cuda_V, +# cuda_prob; +# upper_bound = false, +# ) +# V_gpu = Vector(V_gpu) # Convert to CPU for testing + +# @test V_cpu ≈ V_gpu +# end + +# # More non-zeros +# @testset "more non-zeros" begin +# rng = MersenneTwister(55392) + +# n = 100000 +# m = 10 +# nnz_per_column = 4000 # It has to be greater than 3800 to exceed shared memory for ff implementation +# prob, V, cuda_prob, cuda_V = +# sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) + +# ws = IntervalMDP.construct_workspace(prob) +# strategy_cache = IntervalMDP.construct_strategy_cache(prob) +# V_cpu = zeros(Float64, m) +# IntervalMDP._bellman_helper!( +# ws, +# strategy_cache, +# V_cpu, +# V, +# prob; +# upper_bound = false, +# ) + +# ws = IntervalMDP.construct_workspace(cuda_prob) +# strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) +# V_gpu = CUDA.zeros(Float64, m) +# IntervalMDP._bellman_helper!( +# ws, +# strategy_cache, +# V_gpu, +# cuda_V, +# cuda_prob; +# upper_bound = false, +# ) +# V_gpu = Vector(V_gpu) # Convert to CPU for testing + +# @test V_cpu ≈ V_gpu +# end + +# # Most non-zeros +# @testset "most non-zeros" begin +# rng = MersenneTwister(55392) + +# n = 100000 +# m = 10 +# nnz_per_column = 6000 # It has to be greater than 5800 to exceed shared memory for fi implementation +# prob, V, cuda_prob, cuda_V = +# sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) + +# ws = IntervalMDP.construct_workspace(prob) +# strategy_cache = IntervalMDP.construct_strategy_cache(prob) +# V_cpu = zeros(Float64, m) +# IntervalMDP._bellman_helper!( +# ws, +# strategy_cache, +# V_cpu, +# V, +# prob; +# upper_bound = false, +# ) + +# ws = IntervalMDP.construct_workspace(cuda_prob) +# strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) +# V_gpu = CUDA.zeros(Float64, m) +# IntervalMDP._bellman_helper!( +# ws, +# strategy_cache, +# V_gpu, +# cuda_V, +# cuda_prob; +# upper_bound = false, +# ) +# V_gpu = Vector(V_gpu) # Convert to CPU for testing + +# @test V_cpu ≈ V_gpu +# end + +# # Too many non-zeros +# @testset "too many non-zeros" begin +# rng = MersenneTwister(55392) + +# n = 100000 +# m = 10 +# nnz_per_column = 8000 # It has to be greater than 7800 to exceed shared memory for ii implementation +# prob, V, cuda_prob, cuda_V = +# sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) + +# ws = IntervalMDP.construct_workspace(cuda_prob) +# strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) +# V_gpu = CUDA.zeros(Float64, m) +# @test_throws IntervalMDP.OutOfSharedMemory IntervalMDP._bellman_helper!( +# ws, +# strategy_cache, +# V_gpu, +# cuda_V, +# cuda_prob; +# upper_bound = false, +# ) +# end +# end From fe25251a285aa052f590634703f2769c987e3ef5 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Sun, 28 Sep 2025 20:31:44 +0200 Subject: [PATCH 50/71] WIP large CuSparse kernel --- ext/cuda/array.jl | 43 +- ext/cuda/bellman/sparse.jl | 615 +++++++++------------ ext/cuda/workspace.jl | 2 +- src/probabilities/IntervalAmbiguitySets.jl | 6 +- src/workspace.jl | 4 +- test/cuda/sparse/bellman.jl | 412 +++++++------- 6 files changed, 504 insertions(+), 578 deletions(-) diff --git a/ext/cuda/array.jl b/ext/cuda/array.jl index 5d9014f2..4b36a2a5 100644 --- a/ext/cuda/array.jl +++ b/ext/cuda/array.jl @@ -1,3 +1,42 @@ +function maxdiff(colptr::CuVector{Int32}) + return reducediff(max, colptr, typemin(Int32)) +end + +function reducediff(op, colptr::CuVector{Int32}, neutral) + ret_arr = CuArray{Int32}(undef, 1) + kernel = @cuda launch = false reducediff_kernel!(op, colptr, neutral, ret_arr) + + config = launch_configuration(kernel.fun) + max_threads = prevwarp(device(), config.threads) + wanted_threads = min(1024, nextwarp(device(), length(colptr) - 1)) + + threads = min(max_threads, wanted_threads) + blocks = 1 + + kernel(op, colptr, neutral, ret_arr; blocks = blocks, threads = threads) + + return CUDA.@allowscalar ret_arr[1] +end + +function reducediff_kernel!(op, colptr, neutral, retarr) + diff = neutral + + i = threadIdx().x + @inbounds while i <= length(colptr) - 1 + diff = op(diff, colptr[i + 1] - colptr[i]) + i += blockDim().x + end + + shuffle = Val(true) + diff = CUDA.reduce_block(op, diff, neutral, shuffle) + + if threadIdx().x == 1 + @inbounds retarr[1] = diff + end + + return +end + # This is type piracy - please port upstream to CUDA when FixedSparseCSC are stable. CUDA.CUSPARSE.CuSparseMatrixCSC{Tv, Ti}(M::SparseArrays.FixedSparseCSC) where {Tv, Ti} = CuSparseMatrixCSC{Tv, Ti}( @@ -45,4 +84,6 @@ Adapt.adapt_storage(::Type{<:IntervalMDP.CpuModelAdaptor{Tv1}}, x::CuArray{NTupl const CuSparseDeviceColumnView{Tv, Ti} = SubArray{Tv, 1, <:CuSparseDeviceMatrixCSC{Tv, Ti}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} IntervalMDP.support(p::IntervalMDP.IntervalAmbiguitySet{R, <:CuSparseDeviceColumnView{R}}) where {R} = rowvals(p.gap) -IntervalMDP.supportsize(p::IntervalMDP.IntervalAmbiguitySet{R, <:CuSparseDeviceColumnView{R}}) where {R} = nnz(p.gap) \ No newline at end of file +IntervalMDP.supportsize(p::IntervalMDP.IntervalAmbiguitySet{R, <:CuSparseDeviceColumnView{R}}) where {R} = nnz(p.gap) + +IntervalMDP.maxsupportsize(p::IntervalMDP.IntervalAmbiguitySets{R, <:CuSparseMatrixCSC{R}}) where {R} = maxdiff(SparseArrays.getcolptr(p.gap)) diff --git a/ext/cuda/bellman/sparse.jl b/ext/cuda/bellman/sparse.jl index a0e38abb..b8c87d02 100644 --- a/ext/cuda/bellman/sparse.jl +++ b/ext/cuda/bellman/sparse.jl @@ -23,50 +23,50 @@ function IntervalMDP._bellman_helper!( return Vres end - # # Try if we can fit all values and gaps into shared memory - # if try_large_sparse_bellman!( - # Tv, - # Tv, - # workspace, - # strategy_cache, - # Vres, - # V, - # model; - # upper_bound = upper_bound, - # maximize = maximize, - # ) - # return Vres - # end - - # # Try if we can fit all values and permutation indices into shared memory (25% less memory relative to (Tv, Tv)) - # if try_large_sparse_bellman!( - # Tv, - # Int32, - # workspace, - # strategy_cache, - # Vres, - # V, - # model; - # upper_bound = upper_bound, - # maximize = maximize, - # ) - # return Vres - # end - - # # Try if we can fit permutation indices into shared memory (50% less memory relative to (Tv, Tv)) - # if try_large_sparse_bellman!( - # Int32, - # Int32, - # workspace, - # strategy_cache, - # Vres, - # V, - # model; - # upper_bound = upper_bound, - # maximize = maximize, - # ) - # return Vres - # end + # Try if we can fit all values and gaps into shared memory + if try_large_sparse_bellman!( + Tv, + Tv, + workspace, + strategy_cache, + Vres, + V, + model; + upper_bound = upper_bound, + maximize = maximize, + ) + return Vres + end + + # Try if we can fit all values and permutation indices into shared memory (25% less memory relative to (Tv, Tv)) + if try_large_sparse_bellman!( + Tv, + Int32, + workspace, + strategy_cache, + Vres, + V, + model; + upper_bound = upper_bound, + maximize = maximize, + ) + return Vres + end + + # Try if we can fit permutation indices into shared memory (50% less memory relative to (Tv, Tv)) + if try_large_sparse_bellman!( + Int32, + Int32, + workspace, + strategy_cache, + Vres, + V, + model; + upper_bound = upper_bound, + maximize = maximize, + ) + return Vres + end throw(IntervalMDP.OutOfSharedMemory(workspace.max_support * 2 * sizeof(Int32))) end @@ -94,8 +94,6 @@ function try_small_sparse_bellman!( throw(ArgumentError("Value type of the model ($(IntervalMDP.valuetype(marginal))) does not match the value type of the input vector ($Tv).")) end - desired_warps = 32 - kernel = @cuda launch = false small_sparse_bellman_kernel!( workspace, active_cache(strategy_cache), @@ -107,8 +105,8 @@ function try_small_sparse_bellman!( ) function variable_shmem(threads) - warp_aligned_length = prevwarp(device(), threads) - return (workspace.max_support + n_actions) * 2 * sizeof(Tv) * warp_aligned_length + warps = div(threads, 32) + return (workspace.max_support + n_actions) * 2 * sizeof(Tv) * warps end config = launch_configuration(kernel.fun; shmem = variable_shmem) @@ -282,54 +280,33 @@ end sync_warp() end -# @inline function state_small_sparse_omaximization!( -# action_workspace, -# value_ws, -# gap_ws, -# strategy_cache::NonOptimizingActiveCache, -# Vres, -# V, -# prob, -# stateptr, -# value_lt, -# action_reduce, -# jₛ, -# ) -# lane = mod1(threadIdx().x, warpsize()) - -# @inbounds begin -# s₁ = stateptr[jₛ] -# jₐ = s₁ + strategy_cache[jₛ] - one(Int32) -# sum_lowerⱼ = sum_lower(prob)[jₐ] - -# r = lower(prob).colPtr[jₐ]:(lower(prob).colPtr[jₐ + one(Int32)] - one(Int32)) -# lindsⱼ = @view lower(prob).rowVal[r] -# lvalsⱼ = @view lower(prob).nzVal[r] - -# r = gap(prob).colPtr[jₐ]:(gap(prob).colPtr[jₐ + one(Int32)] - one(Int32)) -# gindsⱼ = @view gap(prob).rowVal[r] -# gvalsⱼ = @view gap(prob).nzVal[r] - -# # Use O-maxmization to find the value for the action -# v = state_action_small_sparse_omaximization!( -# value_ws, -# gap_ws, -# V, -# lindsⱼ, -# lvalsⱼ, -# gindsⱼ, -# gvalsⱼ, -# sum_lowerⱼ, -# value_lt, -# lane, -# ) - -# if lane == one(Int32) -# Vres[jₛ] = v -# end -# sync_warp() -# end -# end +@inline function state_small_sparse_omaximization!( + action_workspace, + value_ws, + gap_ws, + strategy_cache::NonOptimizingActiveCache, + Vres, + V, + marginal, + value_lt, + action_reduce, + jₛ, +) + lane = mod1(threadIdx().x, warpsize()) + + @inbounds begin + jₐ = Int32.(strategy_cache[jₛ]) + ambiguity_set = marginal[jₐ, (jₛ,)] + + # Use O-maxmization to find the value for the action + v = state_action_small_sparse_omaximization!(value_ws, gap_ws, V, ambiguity_set, value_lt, lane) + + if lane == one(Int32) + Vres[jₛ] = v + end + sync_warp() + end +end @inline function state_action_small_sparse_omaximization!( value_ws, @@ -470,11 +447,13 @@ function try_large_sparse_bellman!( # Execution plan: # - one state per block # - use shared memory to store the values/value_perm and gap probability/gap_perm - # - use bitonic sort in a block to sort the values + # - use bitonic sort in a block to sort the values - shmem = - workspace.max_support * (sizeof(T1) + sizeof(T2)) + - workspace.max_actions * sizeof(Tv) + n_actions = isa(strategy_cache, IntervalMDP.OptimizingStrategyCache) ? workspace.num_actions : 1 + marginal = marginals(model)[1] + n_states = source_shape(marginal)[1] + + shmem = workspace.max_support * (sizeof(T1) + sizeof(T2)) + n_actions * sizeof(Tv) kernel = @cuda launch = false large_sparse_bellman_kernel!( T1, @@ -483,24 +462,21 @@ function try_large_sparse_bellman!( active_cache(strategy_cache), Vres, V, - prob, - stateptr, + marginal, upper_bound ? (>=) : (<=), maximize ? (max, >, typemin(Tv)) : (min, <, typemax(Tv)), ) config = launch_configuration(kernel.fun; shmem = shmem) - max_threads = prevwarp(device(), config.threads) + max_threads = prevwarp(device(), config.threads) # 1600008 bytes if max_threads < 32 return false end - wanted_threads = min(1024, nextwarp(device(), cld(workspace.max_support, 2))) - - num_states = length(stateptr) - one(Int32) - threads = min(max_threads, wanted_threads) - blocks = min(2^16 - 1, num_states) + wanted_threads = nextwarp(device(), workspace.max_support) + threads = min(1024, max_threads, wanted_threads) + blocks = min(2^16 - 1, n_states) kernel( T1, @@ -509,8 +485,7 @@ function try_large_sparse_bellman!( active_cache(strategy_cache), Vres, V, - prob, - stateptr, + marginal, upper_bound ? (>=) : (<=), maximize ? (max, >, typemin(Tv)) : (min, <, typemax(Tv)); blocks = blocks, @@ -532,19 +507,11 @@ function large_sparse_bellman_kernel!( value_lt, action_reduce, ) where {Tv, T1, T2} - action_workspace = CuDynamicSharedArray(Tv, workspace.max_actions) - value_ws = - CuDynamicSharedArray(T1, workspace.max_support, workspace.max_actions * sizeof(Tv)) - gap_ws = CuDynamicSharedArray( - T2, - workspace.max_support, - workspace.max_support * sizeof(T1) + workspace.max_actions * sizeof(Tv), - ) + action_workspace = initialize_large_sparse_action_workspace(workspace, strategy_cache, marginal) + value_ws, gap_ws = initialize_large_sparse_value_and_gap(T1, T2, workspace, strategy_cache, V, marginal) - # Grid-stride loop - num_states = length(stateptr) - one(Int32) - j = blockIdx().x - @inbounds while j <= num_states + jₛ = blockIdx().x + @inbounds while jₛ <= source_shape(marginal)[1] # Grid-stride loop state_sparse_omaximization!( action_workspace, value_ws, @@ -552,18 +519,64 @@ function large_sparse_bellman_kernel!( strategy_cache, Vres, V, - prob, - stateptr, + marginal, value_lt, action_reduce, - j, + jₛ, ) - j += gridDim().x + jₛ += gridDim().x end return nothing end +@inline function initialize_large_sparse_action_workspace( + workspace, + ::OptimizingActiveCache, + marginal +) + action_workspace = CuDynamicSharedArray(IntervalMDP.valuetype(marginal), workspace.num_actions) + return action_workspace +end + +@inline function initialize_large_sparse_action_workspace( + workspace, + ::NonOptimizingActiveCache, + marginal +) + return nothing +end + +@inline function initialize_large_sparse_value_and_gap( + ::Type{T1}, + ::Type{T2}, + workspace, + ::OptimizingActiveCache, + V, + marginal +) where {T1, T2} + Tv = IntervalMDP.valuetype(marginal) + + value_ws = CuDynamicSharedArray(T1, workspace.max_support, workspace.num_actions * sizeof(Tv)) + gap_ws = CuDynamicSharedArray(T2, workspace.max_support, workspace.num_actions * sizeof(Tv) + workspace.max_support * sizeof(T1)) + + return value_ws, gap_ws +end + +@inline function initialize_large_sparse_value_and_gap( + ::Type{T1}, + ::Type{T2}, + workspace, + ::NonOptimizingActiveCache, + V, + marginal +) where {T1, T2} + value_ws = CuDynamicSharedArray(T1, workspace.max_support) + gap_ws = CuDynamicSharedArray(T2, workspace.max_support, workspace.max_support * sizeof(T1)) + + return value_ws, gap_ws +end + @inline function state_sparse_omaximization!( action_workspace, value_ws, @@ -571,59 +584,40 @@ end strategy_cache, Vres, V, - prob, - stateptr, + marginal, value_lt, action_reduce, jₛ, ) - wid, lane = fldmod1(threadIdx().x, warpsize()) - - s₁, s₂ = stateptr[jₛ], stateptr[jₛ + one(Int32)] - nactions = s₂ - s₁ - @inbounds action_values = @view action_workspace[1:nactions] - - k = one(Int32) - @inbounds while k <= nactions - jₐ = s₁ + k - one(Int32) - sum_lowerⱼ = sum_lower(prob)[jₐ] - - r = lower(prob).colPtr[jₐ]:(lower(prob).colPtr[jₐ + one(Int32)] - one(Int32)) - lindsⱼ = @view lower(prob).rowVal[r] - lvalsⱼ = @view lower(prob).nzVal[r] + assume(warpsize() == 32) - r = gap(prob).colPtr[jₐ]:(gap(prob).colPtr[jₐ + one(Int32)] - one(Int32)) - gindsⱼ = @view gap(prob).rowVal[r] - gvalsⱼ = @view gap(prob).nzVal[r] + jₐ = one(Int32) + @inbounds while jₐ <= action_shape(marginal)[1] + ambiguity_set = marginal[(jₐ,), (jₛ,)] # Use O-maxmization to find the value for the action v = state_action_sparse_omaximization!( value_ws, gap_ws, V, - lindsⱼ, - lvalsⱼ, - gindsⱼ, - gvalsⱼ, - sum_lowerⱼ, - value_lt, - wid, - lane, + ambiguity_set, + value_lt ) if threadIdx().x == one(Int32) - action_values[k] = v + action_workspace[jₐ] = v end sync_threads() - k += one(Int32) + jₐ += one(Int32) end # Find the best action + wid, lane = fldmod1(threadIdx().x, warpsize()) if wid == one(Int32) v = extract_strategy_warp!( strategy_cache, - action_values, + action_workspace, Vres, jₛ, action_reduce, @@ -644,40 +638,22 @@ end strategy_cache::NonOptimizingActiveCache, Vres, V, - prob, - stateptr, + marginal, value_lt, action_reduce, jₛ, ) - wid, lane = fldmod1(threadIdx().x, warpsize()) - @inbounds begin - s₁ = stateptr[jₛ] - jₐ = s₁ + strategy_cache[jₛ] - one(Int32) - sum_lowerⱼ = sum_lower(prob)[jₐ] - - r = lower(prob).colPtr[jₐ]:(lower(prob).colPtr[jₐ + one(Int32)] - one(Int32)) - lindsⱼ = @view lower(prob).rowVal[r] - lvalsⱼ = @view lower(prob).nzVal[r] - - r = gap(prob).colPtr[jₐ]:(gap(prob).colPtr[jₐ + one(Int32)] - one(Int32)) - gindsⱼ = @view gap(prob).rowVal[r] - gvalsⱼ = @view gap(prob).nzVal[r] + jₐ = Int32.(strategy_cache[jₛ]) + ambiguity_set = marginal[jₐ, (jₛ,)] # Use O-maxmization to find the value for the action v = state_action_sparse_omaximization!( value_ws, gap_ws, V, - lindsⱼ, - lvalsⱼ, - gindsⱼ, - gvalsⱼ, - sum_lowerⱼ, + ambiguity_set, value_lt, - wid, - lane, ) if threadIdx().x == one(Int32) @@ -691,92 +667,74 @@ end value_ws::AbstractVector{Tv}, gap_ws::AbstractVector{Tv}, V, - lower_inds, - lower_vals, - gap_inds, - gap_vals, - sum_lower::Tv, + ambiguity_set, value_lt, - wid, - lane, ) where {Tv} - reduction_ws = CuStaticSharedArray(Tv, 32) - - value = add_lower_mul_V_block(reduction_ws, V, lower_inds, lower_vals, wid, lane) - - ff_sparse_initialize_sorting_shared_memory!(V, gap_inds, gap_vals, value_ws, gap_ws) + ff_sparse_initialize_sorting_shared_memory!(V, ambiguity_set, value_ws, gap_ws) - valueⱼ = @view value_ws[1:length(gap_inds)] - probⱼ = @view gap_ws[1:length(gap_inds)] + valueⱼ = @view value_ws[1:IntervalMDP.supportsize(ambiguity_set)] + probⱼ = @view gap_ws[1:IntervalMDP.supportsize(ambiguity_set)] block_bitonic_sort!(valueⱼ, probⱼ, value_lt) - value += ff_add_gap_mul_V_sparse(reduction_ws, valueⱼ, probⱼ, sum_lower) + + value, remaining = add_lower_mul_V_block(V, ambiguity_set) + value += ff_add_gap_mul_V_sparse(valueⱼ, probⱼ, remaining) return value end @inline function add_lower_mul_V_block( - reduction_ws, - V::AbstractVector{Tv}, - lower_inds, - lower_vals, - wid, - lane, -) where {Tv} - assume(warpsize() == 32) + V::AbstractVector{R}, + ambiguity_set, +) where {R} + share_ws = CuStaticSharedArray(R, 1) - warp_aligned_length = kernel_nextwarp(length(lower_vals)) - lower_value = zero(Tv) + supportsize = IntervalMDP.supportsize(ambiguity_set) + + used = zero(R) + lower_value = zero(R) + support = IntervalMDP.support(ambiguity_set) + # Add the lower bound multiplied by the value s = threadIdx().x - @inbounds while s <= warp_aligned_length + @inbounds while s <= supportsize # Find index of the permutation, and lookup the corresponding lower bound and multipy by the value - val = if s <= length(lower_vals) - lower_vals[s] * V[lower_inds[s]] - else - zero(Tv) - end - lower_value += val - + idx = support[s] + l = lower(ambiguity_set, idx) + lower_value += l * V[idx] + used += l s += blockDim().x end - # Warp-reduction - lower_value = CUDA.reduce_warp(+, lower_value) - sync_threads() - - # Block-reduction - if wid == one(Int32) - reduction_ws[lane] = zero(Tv) - end - sync_threads() + used = CUDA.reduce_block(+, used, zero(R), Val(true)) + lower_value = CUDA.reduce_block(+, lower_value, zero(R), Val(true)) - if lane == one(Int32) - reduction_ws[wid] = lower_value + if threadIdx().x == one(Int32) + share_ws[1] = used # No need to share lower_value since it is only used by the first thread end sync_threads() - if wid == one(Int32) - lower_value = reduction_ws[lane] - lower_value = CUDA.reduce_warp(+, lower_value) - end - sync_threads() + used = share_ws[1] + remaining = one(R) - used - return lower_value + return lower_value, remaining end @inline function ff_sparse_initialize_sorting_shared_memory!( V, - gapinds, - gapvals, + ambiguity_set, value, prob, ) + support = IntervalMDP.support(ambiguity_set) + supportsize = IntervalMDP.supportsize(ambiguity_set) + # Copy into shared memory - i = threadIdx().x - @inbounds while i <= length(gapinds) - value[i] = V[gapinds[i]] - prob[i] = gapvals[i] - i += blockDim().x + s = threadIdx().x + @inbounds while s <= supportsize + idx = support[s] + value[s] = V[idx] + prob[s] = gap(ambiguity_set, idx) + s += blockDim().x end # Need to synchronize to make sure all agree on the shared memory @@ -784,19 +742,17 @@ end end @inline function ff_add_gap_mul_V_sparse( - reduction_ws, value, prob, - sum_lower::Tv, + remaining::Tv, ) where {Tv} assume(warpsize() == 32) + wid, lane = fldmod1(threadIdx().x, warpsize()) + reduction_ws = CuStaticSharedArray(Tv, 32) warp_aligned_length = kernel_nextwarp(length(prob)) - @inbounds remaining = one(Tv) - sum_lower gap_value = zero(Tv) - wid, lane = fldmod1(threadIdx().x, warpsize()) - # Block-strided loop and save into register `gap_value` s = threadIdx().x @inbounds while s <= warp_aligned_length @@ -839,26 +795,7 @@ end s += blockDim().x end - # Warp-reduction - gap_value = CUDA.reduce_warp(+, gap_value) - sync_threads() - - # Block-reduction - if wid == one(Int32) - reduction_ws[lane] = zero(Tv) - end - sync_threads() - - if lane == one(Int32) - reduction_ws[wid] = gap_value - end - sync_threads() - - if wid == one(Int32) - gap_value = reduction_ws[lane] - gap_value = CUDA.reduce_warp(+, gap_value) - end - sync_threads() + gap_value = CUDA.reduce_block(+, gap_value, zero(Tv), Val(true)) return gap_value end @@ -867,36 +804,32 @@ end value::AbstractVector{Tv}, perm::AbstractVector{Int32}, V, - lower_inds, - lower_vals, - gap_inds, - gap_vals, - sum_lower::Tv, - value_lt, - wid, - lane, + ambiguity_set, + value_lt ) where {Tv} - reduction_ws = CuStaticSharedArray(Tv, 32) - - res = add_lower_mul_V_block(reduction_ws, V, lower_inds, lower_vals, wid, lane) + fi_sparse_initialize_sorting_shared_memory!(V, ambiguity_set, value, perm) - fi_sparse_initialize_sorting_shared_memory!(V, gap_inds, value, perm) - - valueⱼ = @view value[1:length(gap_inds)] - permⱼ = @view perm[1:length(gap_inds)] + valueⱼ = @view value[1:IntervalMDP.supportsize(ambiguity_set)] + permⱼ = @view perm[1:IntervalMDP.supportsize(ambiguity_set)] block_bitonic_sort!(valueⱼ, permⱼ, value_lt) - res += fi_add_gap_mul_V_sparse(reduction_ws, valueⱼ, permⱼ, gap_vals, sum_lower) - return res + value, remaining = add_lower_mul_V_block(V, ambiguity_set) + value += fi_add_gap_mul_V_sparse(valueⱼ, permⱼ, ambiguity_set, remaining) + + return remaining end -@inline function fi_sparse_initialize_sorting_shared_memory!(V, gapinds, value, perm) +@inline function fi_sparse_initialize_sorting_shared_memory!(V, ambiguity_set, value, perm) + support = IntervalMDP.support(ambiguity_set) + supportsize = IntervalMDP.supportsize(ambiguity_set) + # Copy into shared memory - i = threadIdx().x - @inbounds while i <= length(gapinds) - value[i] = V[gapinds[i]] - perm[i] = i - i += blockDim().x + s = threadIdx().x + @inbounds while s <= supportsize + idx = support[s] + value[s] = V[idx] + perm[s] = s + s += blockDim().x end # Need to synchronize to make sure all agree on the shared memory @@ -904,27 +837,24 @@ end end @inline function fi_add_gap_mul_V_sparse( - reduction_ws, value, perm, - gapvals, - sum_lower::Tv, + ambiguity_set, + remaining::Tv, ) where {Tv} assume(warpsize() == 32) + wid, lane = fldmod1(threadIdx().x, warpsize()) + reduction_ws = CuStaticSharedArray(Tv, 32) - warp_aligned_length = kernel_nextwarp(length(gapvals)) - @inbounds remaining = one(Tv) - sum_lower + warp_aligned_length = kernel_nextwarp(length(value)) gap_value = zero(Tv) - wid, lane = fldmod1(threadIdx().x, warpsize()) - # Block-strided loop and save into register `gap_value` s = threadIdx().x @inbounds while s <= warp_aligned_length # Find index of the permutation, and lookup the corresponding gap - g = if s <= length(gapvals) - t = perm[s] - gapvals[t] + g = if s <= length(value) + gap(ambiguity_set, perm[s]) else # 0 gap is a neural element zero(Tv) @@ -938,7 +868,7 @@ end remaining += g # Update the probability - if s <= length(gapvals) + if s <= length(value) sub = clamp(remaining, zero(Tv), g) gap_value += sub * value[s] remaining -= sub @@ -961,26 +891,7 @@ end s += blockDim().x end - # Warp-reduction - gap_value = CUDA.reduce_warp(+, gap_value) - sync_threads() - - # Block-reduction - if wid == 1 - reduction_ws[lane] = zero(Tv) - end - sync_threads() - - if lane == 1 - reduction_ws[wid] = gap_value - end - sync_threads() - - if wid == 1 - gap_value = reduction_ws[lane] - gap_value = CUDA.reduce_warp(+, gap_value) - end - sync_threads() + gap_value = CUDA.reduce_block(+, gap_value, zero(Tv), Val(true)) return gap_value end @@ -989,35 +900,29 @@ end Vperm::AbstractVector{Int32}, Pperm::AbstractVector{Int32}, V, - lower_inds, - lower_vals, - gap_inds, - gap_vals, - sum_lower::Tv, - value_lt, - wid, - lane, -) where {Tv} - reduction_ws = CuStaticSharedArray(Tv, 32) - - res = add_lower_mul_V_block(reduction_ws, V, lower_inds, lower_vals, wid, lane) - - ii_sparse_initialize_sorting_shared_memory!(gap_inds, Vperm, Pperm) + ambiguity_set, + value_lt +) + ii_sparse_initialize_sorting_shared_memory!(ambiguity_set, Vperm, Pperm) - Vpermⱼ = @view Vperm[1:length(gap_inds)] - Ppermⱼ = @view Pperm[1:length(gap_inds)] + Vpermⱼ = @view Vperm[1:IntervalMDP.supportsize(ambiguity_set)] + Ppermⱼ = @view Pperm[1:IntervalMDP.supportsize(ambiguity_set)] block_bitonic_sortperm!(V, Vpermⱼ, Ppermⱼ, value_lt) - res += ii_add_gap_mul_V_sparse(reduction_ws, V, Vpermⱼ, Ppermⱼ, gap_vals, sum_lower) + value, remaining = add_lower_mul_V_block(V, ambiguity_set) + value += ii_add_gap_mul_V_sparse(V, Vpermⱼ, Ppermⱼ, ambiguity_set, remaining) - return res + return value end -@inline function ii_sparse_initialize_sorting_shared_memory!(gapinds, Vperm, Pperm) +@inline function ii_sparse_initialize_sorting_shared_memory!(ambiguity_set, Vperm, Pperm) + support = IntervalMDP.support(ambiguity_set) + supportsize = IntervalMDP.supportsize(ambiguity_set) + # Copy into shared memory i = threadIdx().x - @inbounds while i <= length(gapinds) - Vperm[i] = gapinds[i] + @inbounds while i <= supportsize + Vperm[i] = support[i] Pperm[i] = i i += blockDim().x end @@ -1027,28 +932,25 @@ end end @inline function ii_add_gap_mul_V_sparse( - reduction_ws, value, Vperm, Pperm, - gapvals, - sum_lower::Tv, + ambiguity_set, + remaining::Tv, ) where {Tv} assume(warpsize() == 32) + wid, lane = fldmod1(threadIdx().x, warpsize()) + reduction_ws = CuStaticSharedArray(Tv, 32) - warp_aligned_length = kernel_nextwarp(length(gapvals)) - @inbounds remaining = one(Tv) - sum_lower + warp_aligned_length = kernel_nextwarp(length(value)) gap_value = zero(Tv) - wid, lane = fldmod1(threadIdx().x, warpsize()) - # Block-strided loop and save into register `gap_value` s = threadIdx().x @inbounds while s <= warp_aligned_length # Find index of the permutation, and lookup the corresponding gap - g = if s <= length(gapvals) - t = Pperm[s] - gapvals[t] + g = if s <= length(value) + gap(ambiguity_set, Pperm[s]) else # 0 gap is a neural element zero(Tv) @@ -1062,7 +964,7 @@ end remaining += g # Update the probability - if s <= length(gapvals) + if s <= length(value) sub = clamp(remaining, zero(Tv), g) gap_value += sub * value[Vperm[s]] remaining -= sub @@ -1085,26 +987,7 @@ end s += blockDim().x end - # Warp-reduction - gap_value = CUDA.reduce_warp(+, gap_value) - sync_threads() - - # Block-reduction - if wid == 1 - reduction_ws[lane] = zero(Tv) - end - sync_threads() - - if lane == 1 - reduction_ws[wid] = gap_value - end - sync_threads() - - if wid == 1 - gap_value = reduction_ws[lane] - gap_value = CUDA.reduce_warp(+, gap_value) - end - sync_threads() + gap_value = CUDA.reduce_block(+, gap_value, zero(Tv), Val(true)) return gap_value end diff --git a/ext/cuda/workspace.jl b/ext/cuda/workspace.jl index 7c3bbf09..f54d791b 100644 --- a/ext/cuda/workspace.jl +++ b/ext/cuda/workspace.jl @@ -23,7 +23,7 @@ struct CuSparseOMaxWorkspace <: AbstractCuWorkspace end function CuSparseOMaxWorkspace(p::IntervalAmbiguitySets, num_actions) - max_support = maximum(length ∘ IntervalMDP.support, p) + max_support = IntervalMDP.maxsupportsize(p) return CuSparseOMaxWorkspace(max_support, num_actions) end diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index a39d14bc..e1fe526b 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -198,6 +198,8 @@ source_shape(p::IntervalAmbiguitySets) = (num_sets(p),) action_shape(::IntervalAmbiguitySets) = (1,) marginals(p::IntervalAmbiguitySets) = (p,) +maxsupportsize(p::IntervalAmbiguitySets{R, MR}) where {R, MR <: AbstractMatrix{R}} = maximum(supportsize, p) + function Base.getindex(p::IntervalAmbiguitySets, j::Integer) # Select by columns only! l = @view p.lower[:, j] @@ -227,7 +229,7 @@ end function showambiguitysets(io::IO, prefix, p::IntervalAmbiguitySets{R, MR}) where {R, MR <: AbstractSparseMatrix} println(io, prefix, styled"├─ Ambiguity set type: Interval (sparse, {code:$MR})") num_transitions = nnz(p.gap) - max_support = maximum(supportsize, p) + max_support = maxsupportsize(p) println(io, prefix, styled"└─ Transitions: {magenta: $num_transitions (max support: $max_support)}") end @@ -243,7 +245,7 @@ function Base.show(io::IO, mime::MIME"text/plain", p::IntervalAmbiguitySets{R, M println(io, styled"├─ Storage type: {code:$MR}") println(io, "├─ Number of target states: ", num_target(p)) println(io, "├─ Number of ambiguity sets: ", num_sets(p)) - println(io, "├─ Maximum support size: ", maximum(supportsize, p)) + println(io, "├─ Maximum support size: ", maxsupportsize(p)) println(io, "└─ Number of non-zeros: ", nnz(p.gap)) end diff --git a/src/workspace.jl b/src/workspace.jl index ad8f72a1..b5aba092 100644 --- a/src/workspace.jl +++ b/src/workspace.jl @@ -99,7 +99,7 @@ struct SparseIntervalOMaxWorkspace{T <: Real} end function SparseIntervalOMaxWorkspace(ambiguity_sets::IntervalAmbiguitySets{R}, nactions) where {R <: Real} - max_support = maximum(supportsize, ambiguity_sets) + max_support = maxsupportsize(ambiguity_sets) budget = 1 .- vec(sum(ambiguity_sets.lower; dims = 1)) scratch = Vector{Tuple{R, R}}(undef, max_support) @@ -190,7 +190,7 @@ function FactoredIntervalOMaxWorkspace(sys::FactoredRMDP) N = length(marginals(sys)) R = valuetype(sys) - max_support_per_marginal = Tuple(maximum(map(length ∘ support, ambiguity_sets(marginal))) for marginal in marginals(sys)) + max_support_per_marginal = Tuple(maxsupportsize(ambiguity_sets(marginal)) for marginal in marginals(sys)) max_support = maximum(max_support_per_marginal) expectation_cache = NTuple{N - 1, Vector{R}}(Vector{R}(undef, n) for n in max_support_per_marginal[2:end]) diff --git a/test/cuda/sparse/bellman.jl b/test/cuda/sparse/bellman.jl index 0f20de34..0e60c870 100644 --- a/test/cuda/sparse/bellman.jl +++ b/test/cuda/sparse/bellman.jl @@ -57,209 +57,209 @@ for N in [Float32, Float64] end end -# #### Large matrices -# @testset "large matrices" begin -# function sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) -# prob_split = 1 / nnz_per_column - -# rand_val_lower = rand(rng, Float64, nnz_per_column * m) .* prob_split -# rand_val_upper = rand(rng, Float64, nnz_per_column * m) .* prob_split .+ prob_split -# rand_index = collect(1:nnz_per_column) - -# row_vals = Vector{Int32}(undef, nnz_per_column * m) -# col_ptrs = Int32[1; collect(1:m) .* nnz_per_column .+ 1] - -# for j in 1:m -# StatsBase.seqsample_a!(rng, 1:n, rand_index) # Select nnz_per_column elements from 1:n -# sort!(rand_index) - -# row_vals[((j - 1) * nnz_per_column + 1):(j * nnz_per_column)] .= rand_index -# end - -# lower = SparseMatrixCSC{Float64, Int32}(n, m, col_ptrs, row_vals, rand_val_lower) -# upper = SparseMatrixCSC{Float64, Int32}(n, m, col_ptrs, row_vals, rand_val_upper) - -# prob = IntervalAmbiguitySets(; lower = lower, upper = upper) -# V = rand(rng, Float64, n) - -# cuda_prob = IntervalMDP.cu(prob) -# cuda_V = IntervalMDP.cu(V) - -# return prob, V, cuda_prob, cuda_V -# end - -# # Many columns -# @testset "many columns" begin -# rng = MersenneTwister(55392) - -# n = 100 -# m = 1000000 # It has to be greater than 8 * 2^16 to exceed maximum grid size -# nnz_per_column = 2 -# prob, V, cuda_prob, cuda_V = -# sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) - -# ws = IntervalMDP.construct_workspace(prob) -# strategy_cache = IntervalMDP.construct_strategy_cache(prob) -# V_cpu = zeros(Float64, m) -# IntervalMDP._bellman_helper!( -# ws, -# strategy_cache, -# V_cpu, -# V, -# prob; -# upper_bound = false, -# ) - -# ws = IntervalMDP.construct_workspace(cuda_prob) -# strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) -# V_gpu = CUDA.zeros(Float64, m) -# IntervalMDP._bellman_helper!( -# ws, -# strategy_cache, -# V_gpu, -# cuda_V, -# cuda_prob; -# upper_bound = false, -# ) -# V_gpu = Vector(V_gpu) # Convert to CPU for testing - -# @test V_cpu ≈ V_gpu -# end - -# # Many non-zeros -# @testset "many non-zeros" begin -# rng = MersenneTwister(55392) - -# n = 100000 -# m = 10 -# nnz_per_column = 1500 # It has to be greater than 187 to fill shared memory with 8 states per block. -# prob, V, cuda_prob, cuda_V = -# sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) - -# ws = IntervalMDP.construct_workspace(prob) -# strategy_cache = IntervalMDP.construct_strategy_cache(prob) -# V_cpu = zeros(Float64, m) -# IntervalMDP._bellman_helper!( -# ws, -# strategy_cache, -# V_cpu, -# V, -# prob; -# upper_bound = false, -# ) - -# ws = IntervalMDP.construct_workspace(cuda_prob) -# strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) -# V_gpu = CUDA.zeros(Float64, m) -# IntervalMDP._bellman_helper!( -# ws, -# strategy_cache, -# V_gpu, -# cuda_V, -# cuda_prob; -# upper_bound = false, -# ) -# V_gpu = Vector(V_gpu) # Convert to CPU for testing - -# @test V_cpu ≈ V_gpu -# end - -# # More non-zeros -# @testset "more non-zeros" begin -# rng = MersenneTwister(55392) - -# n = 100000 -# m = 10 -# nnz_per_column = 4000 # It has to be greater than 3800 to exceed shared memory for ff implementation -# prob, V, cuda_prob, cuda_V = -# sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) - -# ws = IntervalMDP.construct_workspace(prob) -# strategy_cache = IntervalMDP.construct_strategy_cache(prob) -# V_cpu = zeros(Float64, m) -# IntervalMDP._bellman_helper!( -# ws, -# strategy_cache, -# V_cpu, -# V, -# prob; -# upper_bound = false, -# ) - -# ws = IntervalMDP.construct_workspace(cuda_prob) -# strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) -# V_gpu = CUDA.zeros(Float64, m) -# IntervalMDP._bellman_helper!( -# ws, -# strategy_cache, -# V_gpu, -# cuda_V, -# cuda_prob; -# upper_bound = false, -# ) -# V_gpu = Vector(V_gpu) # Convert to CPU for testing - -# @test V_cpu ≈ V_gpu -# end - -# # Most non-zeros -# @testset "most non-zeros" begin -# rng = MersenneTwister(55392) - -# n = 100000 -# m = 10 -# nnz_per_column = 6000 # It has to be greater than 5800 to exceed shared memory for fi implementation -# prob, V, cuda_prob, cuda_V = -# sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) - -# ws = IntervalMDP.construct_workspace(prob) -# strategy_cache = IntervalMDP.construct_strategy_cache(prob) -# V_cpu = zeros(Float64, m) -# IntervalMDP._bellman_helper!( -# ws, -# strategy_cache, -# V_cpu, -# V, -# prob; -# upper_bound = false, -# ) - -# ws = IntervalMDP.construct_workspace(cuda_prob) -# strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) -# V_gpu = CUDA.zeros(Float64, m) -# IntervalMDP._bellman_helper!( -# ws, -# strategy_cache, -# V_gpu, -# cuda_V, -# cuda_prob; -# upper_bound = false, -# ) -# V_gpu = Vector(V_gpu) # Convert to CPU for testing - -# @test V_cpu ≈ V_gpu -# end - -# # Too many non-zeros -# @testset "too many non-zeros" begin -# rng = MersenneTwister(55392) - -# n = 100000 -# m = 10 -# nnz_per_column = 8000 # It has to be greater than 7800 to exceed shared memory for ii implementation -# prob, V, cuda_prob, cuda_V = -# sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) - -# ws = IntervalMDP.construct_workspace(cuda_prob) -# strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) -# V_gpu = CUDA.zeros(Float64, m) -# @test_throws IntervalMDP.OutOfSharedMemory IntervalMDP._bellman_helper!( -# ws, -# strategy_cache, -# V_gpu, -# cuda_V, -# cuda_prob; -# upper_bound = false, -# ) -# end -# end +#### Large matrices +@testset "large matrices" begin + function sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) + prob_split = 1 / nnz_per_column + + rand_val_lower = rand(rng, Float64, nnz_per_column * m) .* prob_split + rand_val_upper = rand(rng, Float64, nnz_per_column * m) .* prob_split .+ prob_split + rand_index = collect(1:nnz_per_column) + + row_vals = Vector{Int32}(undef, nnz_per_column * m) + col_ptrs = Int32[1; collect(1:m) .* nnz_per_column .+ 1] + + for j in 1:m + StatsBase.seqsample_a!(rng, 1:n, rand_index) # Select nnz_per_column elements from 1:n + sort!(rand_index) + + row_vals[((j - 1) * nnz_per_column + 1):(j * nnz_per_column)] .= rand_index + end + + lower = SparseMatrixCSC{Float64, Int32}(n, m, col_ptrs, row_vals, rand_val_lower) + upper = SparseMatrixCSC{Float64, Int32}(n, m, col_ptrs, row_vals, rand_val_upper) + + prob = IntervalAmbiguitySets(; lower = lower, upper = upper) + V = rand(rng, Float64, n) + + cuda_prob = IntervalMDP.cu(prob) + cuda_V = IntervalMDP.cu(V) + + return prob, V, cuda_prob, cuda_V + end + + # Many columns + @testset "many columns" begin + rng = MersenneTwister(55392) + + n = 100 + m = 5000000 # It has to be greater than 32 * 2^16 = 2^21 to exceed maximum grid size + nnz_per_column = 10 + prob, V, cuda_prob, cuda_V = + sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) + + ws = IntervalMDP.construct_workspace(prob) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + V_cpu = zeros(Float64, m) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + V_cpu, + V, + prob; + upper_bound = false, + ) + + ws = IntervalMDP.construct_workspace(cuda_prob) + strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) + V_gpu = CUDA.zeros(Float64, m) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + V_gpu, + cuda_V, + cuda_prob; + upper_bound = false, + ) + V_gpu = Vector(V_gpu) # Convert to CPU for testing + + @test V_cpu ≈ V_gpu + end + + # Many non-zeros + @testset "many non-zeros" begin + rng = MersenneTwister(55392) + + n = 100000 + m = 10 + nnz_per_column = 1500 # It has to be greater than 187 to fill shared memory with up to 32 states per block. + prob, V, cuda_prob, cuda_V = + sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) + + ws = IntervalMDP.construct_workspace(prob) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + V_cpu = zeros(Float64, m) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + V_cpu, + V, + prob; + upper_bound = false, + ) + + ws = IntervalMDP.construct_workspace(cuda_prob) + strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) + V_gpu = CUDA.zeros(Float64, m) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + V_gpu, + cuda_V, + cuda_prob; + upper_bound = false, + ) + V_gpu = Vector(V_gpu) # Convert to CPU for testing + + @test V_cpu ≈ V_gpu + end + + # More non-zeros + @testset "more non-zeros" begin + rng = MersenneTwister(55392) + + n = 100000 + m = 10 + nnz_per_column = 4000 # It has to be greater than 3800 to exceed shared memory for ff implementation + prob, V, cuda_prob, cuda_V = + sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) + + ws = IntervalMDP.construct_workspace(prob) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + V_cpu = zeros(Float64, m) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + V_cpu, + V, + prob; + upper_bound = false, + ) + + ws = IntervalMDP.construct_workspace(cuda_prob) + strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) + V_gpu = CUDA.zeros(Float64, m) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + V_gpu, + cuda_V, + cuda_prob; + upper_bound = false, + ) + V_gpu = Vector(V_gpu) # Convert to CPU for testing + + @test V_cpu ≈ V_gpu + end + + # Most non-zeros + @testset "most non-zeros" begin + rng = MersenneTwister(55392) + + n = 100000 + m = 10 + nnz_per_column = 6000 # It has to be greater than 5800 to exceed shared memory for fi implementation + prob, V, cuda_prob, cuda_V = + sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) + + ws = IntervalMDP.construct_workspace(prob) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + V_cpu = zeros(Float64, m) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + V_cpu, + V, + prob; + upper_bound = false, + ) + + ws = IntervalMDP.construct_workspace(cuda_prob) + strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) + V_gpu = CUDA.zeros(Float64, m) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + V_gpu, + cuda_V, + cuda_prob; + upper_bound = false, + ) + V_gpu = Vector(V_gpu) # Convert to CPU for testing + + @test V_cpu ≈ V_gpu + end + + # Too many non-zeros + @testset "too many non-zeros" begin + rng = MersenneTwister(55392) + + n = 100000 + m = 10 + nnz_per_column = 8000 # It has to be greater than 7800 to exceed shared memory for ii implementation + prob, V, cuda_prob, cuda_V = + sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) + + ws = IntervalMDP.construct_workspace(cuda_prob) + strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) + V_gpu = CUDA.zeros(Float64, m) + @test_throws IntervalMDP.OutOfSharedMemory IntervalMDP._bellman_helper!( + ws, + strategy_cache, + V_gpu, + cuda_V, + cuda_prob; + upper_bound = false, + ) + end +end From 92dd8f13f79dc41af1c20feddb8720d8da459c50 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Sun, 28 Sep 2025 22:17:15 +0200 Subject: [PATCH 51/71] Fix CuSparse O-max kernel --- ext/cuda/bellman/sparse.jl | 69 +- ext/cuda/sorting.jl | 8 +- ext/cuda/utils.jl | 8 + test/cuda/cuda.jl | 10 +- test/cuda/dense/bellman.jl | 12 +- test/cuda/dense/vi.jl | 1 - test/cuda/sparse/bellman.jl | 10 +- test/cuda/sparse/imdp.jl | 1113 +++++++++++++++++++-------------- test/cuda/sparse/synthesis.jl | 91 ++- test/cuda/sparse/vi.jl | 300 +++++---- 10 files changed, 973 insertions(+), 649 deletions(-) diff --git a/ext/cuda/bellman/sparse.jl b/ext/cuda/bellman/sparse.jl index b8c87d02..7c7089d5 100644 --- a/ext/cuda/bellman/sparse.jl +++ b/ext/cuda/bellman/sparse.jl @@ -56,7 +56,7 @@ function IntervalMDP._bellman_helper!( # Try if we can fit permutation indices into shared memory (50% less memory relative to (Tv, Tv)) if try_large_sparse_bellman!( Int32, - Int32, + Nothing, workspace, strategy_cache, Vres, @@ -68,7 +68,7 @@ function IntervalMDP._bellman_helper!( return Vres end - throw(IntervalMDP.OutOfSharedMemory(workspace.max_support * 2 * sizeof(Int32))) + throw(IntervalMDP.OutOfSharedMemory(workspace.max_support * sizeof(Int32))) end function try_small_sparse_bellman!( @@ -563,6 +563,21 @@ end return value_ws, gap_ws end +@inline function initialize_large_sparse_value_and_gap( + ::Type{T1}, + ::Type{Nothing}, + workspace, + ::OptimizingActiveCache, + V, + marginal +) where {T1} + Tv = IntervalMDP.valuetype(marginal) + + value_ws = CuDynamicSharedArray(T1, workspace.max_support, workspace.num_actions * sizeof(Tv)) + + return value_ws, nothing +end + @inline function initialize_large_sparse_value_and_gap( ::Type{T1}, ::Type{T2}, @@ -577,6 +592,19 @@ end return value_ws, gap_ws end +@inline function initialize_large_sparse_value_and_gap( + ::Type{T1}, + ::Type{Nothing}, + workspace, + ::NonOptimizingActiveCache, + V, + marginal +) where {T1} + value_ws = CuDynamicSharedArray(T1, workspace.max_support) + + return value_ws, nothing +end + @inline function state_sparse_omaximization!( action_workspace, value_ws, @@ -816,7 +844,7 @@ end value, remaining = add_lower_mul_V_block(V, ambiguity_set) value += fi_add_gap_mul_V_sparse(valueⱼ, permⱼ, ambiguity_set, remaining) - return remaining + return value end @inline function fi_sparse_initialize_sorting_shared_memory!(V, ambiguity_set, value, perm) @@ -828,7 +856,7 @@ end @inbounds while s <= supportsize idx = support[s] value[s] = V[idx] - perm[s] = s + perm[s] = idx s += blockDim().x end @@ -897,33 +925,31 @@ end end @inline function state_action_sparse_omaximization!( - Vperm::AbstractVector{Int32}, - Pperm::AbstractVector{Int32}, + perm::AbstractVector{Int32}, + ::Nothing, V, ambiguity_set, value_lt ) - ii_sparse_initialize_sorting_shared_memory!(ambiguity_set, Vperm, Pperm) + i_sparse_initialize_sorting_shared_memory!(ambiguity_set, perm) - Vpermⱼ = @view Vperm[1:IntervalMDP.supportsize(ambiguity_set)] - Ppermⱼ = @view Pperm[1:IntervalMDP.supportsize(ambiguity_set)] - block_bitonic_sortperm!(V, Vpermⱼ, Ppermⱼ, value_lt) + perm = @view perm[1:IntervalMDP.supportsize(ambiguity_set)] + block_bitonic_sortperm!(V, perm, nothing, value_lt) value, remaining = add_lower_mul_V_block(V, ambiguity_set) - value += ii_add_gap_mul_V_sparse(V, Vpermⱼ, Ppermⱼ, ambiguity_set, remaining) + value += i_add_gap_mul_V_sparse(V, perm, ambiguity_set, remaining) return value end -@inline function ii_sparse_initialize_sorting_shared_memory!(ambiguity_set, Vperm, Pperm) +@inline function i_sparse_initialize_sorting_shared_memory!(ambiguity_set, perm) support = IntervalMDP.support(ambiguity_set) supportsize = IntervalMDP.supportsize(ambiguity_set) # Copy into shared memory i = threadIdx().x @inbounds while i <= supportsize - Vperm[i] = support[i] - Pperm[i] = i + perm[i] = support[i] i += blockDim().x end @@ -931,10 +957,9 @@ end sync_threads() end -@inline function ii_add_gap_mul_V_sparse( +@inline function i_add_gap_mul_V_sparse( value, - Vperm, - Pperm, + perm, ambiguity_set, remaining::Tv, ) where {Tv} @@ -942,15 +967,15 @@ end wid, lane = fldmod1(threadIdx().x, warpsize()) reduction_ws = CuStaticSharedArray(Tv, 32) - warp_aligned_length = kernel_nextwarp(length(value)) + warp_aligned_length = kernel_nextwarp(length(perm)) gap_value = zero(Tv) # Block-strided loop and save into register `gap_value` s = threadIdx().x @inbounds while s <= warp_aligned_length # Find index of the permutation, and lookup the corresponding gap - g = if s <= length(value) - gap(ambiguity_set, Pperm[s]) + g = if s <= length(perm) + gap(ambiguity_set, perm[s]) else # 0 gap is a neural element zero(Tv) @@ -964,9 +989,9 @@ end remaining += g # Update the probability - if s <= length(value) + if s <= length(perm) sub = clamp(remaining, zero(Tv), g) - gap_value += sub * value[Vperm[s]] + gap_value += sub * value[perm[s]] remaining -= sub end diff --git a/ext/cuda/sorting.jl b/ext/cuda/sorting.jl index 47017b7a..52261775 100644 --- a/ext/cuda/sorting.jl +++ b/ext/cuda/sorting.jl @@ -30,8 +30,8 @@ end @inbounds while i <= length(value) if l <= length(value) && !lt(value[i], value[l]) - value[i], value[l] = value[l], value[i] - aux[i], aux[l] = aux[l], aux[i] + swapelem(value, i, l) + swapelem(aux, i, l) end thread += blockDim().x @@ -81,8 +81,8 @@ end @inbounds while i <= length(perm) if l <= length(perm) && !lt(value[perm[i]], value[perm[l]]) - perm[i], perm[l] = perm[l], perm[i] - aux[i], aux[l] = aux[l], aux[i] + swapelem(perm, i, l) + swapelem(aux, i, l) end thread += blockDim().x diff --git a/ext/cuda/utils.jl b/ext/cuda/utils.jl index e6014d99..51a5ee40 100644 --- a/ext/cuda/utils.jl +++ b/ext/cuda/utils.jl @@ -66,6 +66,14 @@ end end end +@inline function swapelem(A::AbstractArray, i, j) + @inbounds A[i], A[j] = A[j], A[i] +end + +@inline function swapelem(A::Nothing, i, j) + # Do nothing +end + @inline function selectotherdims(A::AbstractArray, dim, idxs) head, tail = idxs[1:(dim - 1)], idxs[dim:end] diff --git a/test/cuda/cuda.jl b/test/cuda/cuda.jl index 38e7a8f6..f809cce9 100644 --- a/test/cuda/cuda.jl +++ b/test/cuda/cuda.jl @@ -18,11 +18,11 @@ test_files = [ "dense/bellman.jl", "dense/vi.jl", "dense/imdp.jl", - # "dense/synthesis.jl", - # "sparse/bellman.jl", - # "sparse/vi.jl", - # "sparse/imdp.jl", - # "sparse/synthesis.jl", + "dense/synthesis.jl", + "sparse/bellman.jl", + "sparse/vi.jl", + "sparse/imdp.jl", + "sparse/synthesis.jl", ] if CUDA.functional() diff --git a/test/cuda/dense/bellman.jl b/test/cuda/dense/bellman.jl index a57d0f06..91663a05 100644 --- a/test/cuda/dense/bellman.jl +++ b/test/cuda/dense/bellman.jl @@ -3,8 +3,16 @@ using IntervalMDP, CUDA @testset for N in [Float32, Float64] prob = IntervalAmbiguitySets(; - lower = N[0 1//2; 1//10 3//10; 2//10 1//10], - upper = N[5//10 7//10; 6//10 5//10; 7//10 3//10], + lower = N[ + 0 1//2 + 1//10 3//10 + 2//10 1//10 + ], + upper = N[ + 5//10 7//10 + 6//10 5//10 + 7//10 3//10 + ], ) prob = IntervalMDP.cu(prob) diff --git a/test/cuda/dense/vi.jl b/test/cuda/dense/vi.jl index 11c4e8ee..5a23a7b5 100644 --- a/test/cuda/dense/vi.jl +++ b/test/cuda/dense/vi.jl @@ -1,7 +1,6 @@ using Revise, Test using IntervalMDP, CUDA - @testset for N in [Float32, Float64] prob = IntervalAmbiguitySets(; lower = N[ diff --git a/test/cuda/sparse/bellman.jl b/test/cuda/sparse/bellman.jl index 0e60c870..cc4d0abf 100644 --- a/test/cuda/sparse/bellman.jl +++ b/test/cuda/sparse/bellman.jl @@ -121,7 +121,7 @@ end cuda_prob; upper_bound = false, ) - V_gpu = Vector(V_gpu) # Convert to CPU for testing + V_gpu = IntervalMDP.cpu(V_gpu) # Convert to CPU for testing @test V_cpu ≈ V_gpu end @@ -159,7 +159,7 @@ end cuda_prob; upper_bound = false, ) - V_gpu = Vector(V_gpu) # Convert to CPU for testing + V_gpu = IntervalMDP.cpu(V_gpu) # Convert to CPU for testing @test V_cpu ≈ V_gpu end @@ -197,7 +197,7 @@ end cuda_prob; upper_bound = false, ) - V_gpu = Vector(V_gpu) # Convert to CPU for testing + V_gpu = IntervalMDP.cpu(V_gpu) # Convert to CPU for testing @test V_cpu ≈ V_gpu end @@ -235,7 +235,7 @@ end cuda_prob; upper_bound = false, ) - V_gpu = Vector(V_gpu) # Convert to CPU for testing + V_gpu = IntervalMDP.cpu(V_gpu) # Convert to CPU for testing @test V_cpu ≈ V_gpu end @@ -246,7 +246,7 @@ end n = 100000 m = 10 - nnz_per_column = 8000 # It has to be greater than 7800 to exceed shared memory for ii implementation + nnz_per_column = 16000 # It has to be greater than 15600 to exceed shared memory for i implementation prob, V, cuda_prob, cuda_V = sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) diff --git a/test/cuda/sparse/imdp.jl b/test/cuda/sparse/imdp.jl index c2596058..c9b6e1e0 100644 --- a/test/cuda/sparse/imdp.jl +++ b/test/cuda/sparse/imdp.jl @@ -1,499 +1,664 @@ using Revise, Test -using IntervalMDP, SparseArrays, CUDA - -prob1 = IntervalProbabilities(; - lower = sparse([ - 0.0 0.5 - 0.1 0.3 - 0.2 0.1 - ]), - upper = sparse([ - 0.5 0.7 - 0.6 0.5 - 0.7 0.3 - ]), -) - -prob2 = IntervalProbabilities(; - lower = sparse([ - 0.1 0.2 - 0.2 0.3 - 0.3 0.4 - ]), - upper = sparse([ - 0.6 0.6 - 0.5 0.5 - 0.4 0.4 - ]), -) - -prob3 = IntervalProbabilities(; - lower = sparse([ - 0.0 - 0.0 - 1.0 - ][:, :]), - upper = sparse([ - 0.0 - 0.0 - 1.0 - ][:, :]), -) - -transition_probs = [prob1, prob2, prob3] -istates = [Int32(1)] - -mdp = IntervalMDP.cu(IntervalMarkovDecisionProcess(transition_probs, istates)) -@test Vector(initial_states(mdp)) == istates - -mdp = IntervalMDP.cu(IntervalMarkovDecisionProcess(transition_probs)) - -@testset "explicit sink state" begin - transition_prob, _ = IntervalMDP.interval_prob_hcat(transition_probs) - @test_throws DimensionMismatch IntervalMarkovChain(transition_prob) - - # Finite time reachability - @testset "finite time reachability" begin - prop = FiniteTimeReachability([3], 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - V_fixed_it1 = Vector(V_fixed_it1) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - V_fixed_it2 = Vector(V_fixed_it2) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - V_fixed_it1 = Vector(V_fixed_it1) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - V_fixed_it2 = Vector(V_fixed_it2) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) +using IntervalMDP, CUDA + +@testset for N in [Float32, Float64] + prob1 = IntervalAmbiguitySets(; + lower = sparse(N[ + 0 1//2 + 1//10 3//10 + 1//5 1//10 + ]), + upper = sparse(N[ + 1//2 7//10 + 3//5 1//2 + 7//10 3//10 + ]), + ) + + prob2 = IntervalAmbiguitySets(; + lower = sparse(N[ + 1//10 1//5 + 1//5 3//10 + 3//10 2//5 + ]), + upper = sparse(N[ + 3//5 3//5 + 1//2 1//2 + 2//5 2//5 + ]), + ) + + prob3 = IntervalAmbiguitySets(; + lower = sparse(N[ + 0 0 + 0 0 + 1 1 + ]), + upper = sparse(N[ + 0 0 + 0 0 + 1 1 + ]) + ) + + transition_probs = [prob1, prob2, prob3] + istates = [1] + + mdp = IntervalMDP.cu(IntervalMarkovDecisionProcess(transition_probs, istates)) + @test IntervalMDP.cpu(initial_states(mdp)) == istates + + mdp = IntervalMDP.cu(IntervalMarkovDecisionProcess(transition_probs)) + + @testset "bellman" begin + V = IntervalMDP.cu(N[1, 2, 3]) + Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true) + Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing + @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] + + Vres = IntervalMDP.cu(similar(Vres)) + IntervalMDP.bellman!(Vres, V, mdp; upper_bound = false, maximize = true) + Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing + @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] end - # Infinite time reachability - @testset "infinite time reachability" begin - prop = InfiniteTimeReachability([3], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv, _, u = solve(problem) - V_conv = Vector(V_conv) # Convert to CPU for testing - @test maximum(u) <= 1e-6 - @test all(V_conv .>= 0.0) + @testset "explicit sink state" begin + transition_prob = IntervalMDP.interval_prob_hcat(transition_probs) + @test_throws DimensionMismatch IntervalMarkovChain(transition_prob) + + # Finite time reachability + @testset "finite time reachability" begin + prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test V_fixed_it1[3] == N(1) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test V_fixed_it1[3] == N(1) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + end + + # Infinite time reachability + @testset "infinite time reachability" begin + prop = InfiniteTimeReachability([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test V_conv[3] == N(1) + end + + # Exact time reachability + @testset "exact time reachability" begin + prop = ExactTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + + # Compare exact time to finite time + prop = ExactTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + + prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + end + + # Finite time reach avoid + @testset "finite time reach/avoid" begin + prop = FiniteTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + @test V_fixed_it1[3] == N(1) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + @test V_fixed_it2[2] == N(0) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + @test V_fixed_it1[3] == N(1) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[3] == N(1) + @test V_fixed_it2[2] == N(0) + end + + # Infinite time reach avoid + @testset "infinite time reach/avoid" begin + prop = InfiniteTimeReachAvoid([3], [2], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + @test V_conv[3] == N(1) + @test V_conv[2] == N(0) + end + + # Exact time reach avoid + @testset "exact time reach/avoid" begin + prop = ExactTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test all(V_fixed_it1 .<= N(1)) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[2] == N(0) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + @test V_fixed_it1[2] == N(0) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + @test V_fixed_it2[2] == N(0) + + # Compare exact time to finite time + prop = ExactTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + + prop = FiniteTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + end + + # Finite time reward + @testset "finite time reward" begin + prop = IntervalMDP.cu(FiniteTimeReward(N[2, 1, 0], N(9//10), 10)) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it1, k, _ = solve(problem) + V_fixed_it1 = IntervalMDP.cpu(V_fixed_it1) + @test k == 10 + @test all(V_fixed_it1 .>= N(0)) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_fixed_it2, k, _ = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) + @test k == 10 + @test all(V_fixed_it1 .<= V_fixed_it2) + end + + # Infinite time reward + @testset "infinite time reward" begin + prop = IntervalMDP.cu(InfiniteTimeReward(N[2, 1, 0], N(9//10), N(1//1_000_000))) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + end + + # Expected exit time + @testset "expected exit time" begin + prop = ExpectedExitTime([3], N(1//1_000_000)) + + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv1, _, u = solve(problem) + V_conv1 = IntervalMDP.cpu(V_conv1) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .>= N(0)) + @test V_conv1[3] == N(0) + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V_conv2, _, u = solve(problem) + V_conv2 = IntervalMDP.cpu(V_conv2) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .<= V_conv2) + @test V_conv2[3] == N(0) + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_conv1, _, u = solve(problem) + V_conv1 = IntervalMDP.cpu(V_conv1) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .>= N(0)) + @test V_conv1[3] == N(0) + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V_conv2, _, u = solve(problem) + V_conv2 = IntervalMDP.cpu(V_conv2) + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv1 .<= V_conv2) + @test V_conv2[3] == N(0) + end end - # Finite time reach avoid - @testset "finite time reach/avoid" begin - prop = FiniteTimeReachAvoid([3], [2], 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - V_fixed_it1 = Vector(V_fixed_it1) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - V_fixed_it2 = Vector(V_fixed_it2) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - V_fixed_it1 = Vector(V_fixed_it1) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - V_fixed_it2 = Vector(V_fixed_it2) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - end + @testset "implicit sink state" begin + transition_probs = [prob1, prob2] + implicit_mdp = IntervalMDP.cu(IntervalMarkovDecisionProcess(transition_probs)) - # Infinite time reach avoid - @testset "infinite time reach/avoid" begin - prop = InfiniteTimeReachAvoid([3], [2], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv, _, u = solve(problem) - V_conv = Vector(V_conv) # Convert to CPU for testing - @test maximum(u) <= 1e-6 - @test all(V_conv .>= 0.0) - end + # Finite time reachability + @testset "finite time reachability" begin + prop = FiniteTimeReachability([3], 10) - # Finite time reward - @testset "finite time reward" begin - prop = FiniteTimeReward(IntervalMDP.cu([2.0, 1.0, 0.0]), 0.9, 10) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - V_fixed_it1 = Vector(V_fixed_it1) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - V_fixed_it2 = Vector(V_fixed_it2) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it1, k, _ = solve(problem) - V_fixed_it1 = Vector(V_fixed_it1) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .>= 0.0) - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_fixed_it2, k, _ = solve(problem) - V_fixed_it2 = Vector(V_fixed_it2) # Convert to CPU for testing - @test k == 10 - @test all(V_fixed_it1 .<= V_fixed_it2) - end + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - # Infinite time reward - @testset "infinite time reward" begin - prop = InfiniteTimeReward(IntervalMDP.cu([2.0, 1.0, 0.0]), 0.9, 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv, _, u = solve(problem) - V_conv = Vector(V_conv) # Convert to CPU for testing - @test maximum(u) <= 1e-6 - @test all(V_conv .>= 0.0) - end + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - # Expected exit time - @testset "expected exit time" begin - prop = ExpectedExitTime([3], 1e-6) - - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv1, _, u = solve(problem) - V_conv1 = Vector(V_conv1) # Convert to CPU for testing - @test maximum(u) <= 1e-6 - @test all(V_conv1 .>= 0.0) - @test V_conv1[3] == 0.0 - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V_conv2, _, u = solve(problem) - V_conv2 = Vector(V_conv2) # Convert to CPU for testing - @test maximum(u) <= 1e-6 - @test all(V_conv1 .<= V_conv2) - @test V_conv2[3] == 0.0 - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_conv1, _, u = solve(problem) - V_conv1 = Vector(V_conv1) # Convert to CPU for testing - @test maximum(u) <= 1e-6 - @test all(V_conv1 .>= 0.0) - @test V_conv1[3] == 0.0 - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V_conv2, _, u = solve(problem) - V_conv2 = Vector(V_conv2) # Convert to CPU for testing - @test maximum(u) <= 1e-6 - @test all(V_conv1 .<= V_conv2) - @test V_conv2[3] == 0.0 - end -end - -@testset "implicit sink state" begin - transition_probs = [prob1, prob2] - implicit_mdp = IntervalMDP.cu(IntervalMarkovDecisionProcess(transition_probs)) - - # Finite time reachability - @testset "finite time reachability" begin - prop = FiniteTimeReachability([3], 10) - - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 - # Infinite time reachability - @testset "infinite time reachability" begin - prop = InfiniteTimeReachability([3], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - # Finite time reach avoid - @testset "finite time reach/avoid" begin - prop = FiniteTimeReachAvoid([3], [2], 10) - - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - # Infinite time reach avoid - @testset "infinite time reach/avoid" begin - prop = InfiniteTimeReachAvoid([3], [2], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end - # Finite time reward - @testset "finite time reward" begin - prop = FiniteTimeReward(IntervalMDP.cu([2.0, 1.0, 0.0]), 0.9, 10) - - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Optimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Pessimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - - spec = Specification(prop, Optimistic, Minimize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + # Infinite time reachability + @testset "infinite time reachability" begin + prop = InfiniteTimeReachability([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) - # Infinite time reward - @testset "infinite time reward" begin - prop = InfiniteTimeReward(IntervalMDP.cu([2.0, 1.0, 0.0]), 0.9, 1e-6) - spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing - - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing - - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit - end + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end + + # Exact time reachability + @testset "exact time reachability" begin + prop = ExactTimeReachability([3], 10) + + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end + + # Finite time reach avoid + @testset "finite time reach/avoid" begin + prop = FiniteTimeReachAvoid([3], [2], 10) + + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end + + # Infinite time reach avoid + @testset "infinite time reach/avoid" begin + prop = InfiniteTimeReachAvoid([3], [2], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) + + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end + + # Exact time reach avoid + @testset "exact time reach/avoid" begin + prop = ExactTimeReachAvoid([3], [2], 10) + + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end + + # Finite time reward + @testset "finite time reward" begin + prop = IntervalMDP.cu(FiniteTimeReward(N[2, 1, 0], N(9//10), 10)) + + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Pessimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + + spec = Specification(prop, Optimistic, Minimize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test k == k_implicit + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end + + # Infinite time reward + @testset "infinite time reward" begin + prop = IntervalMDP.cu(InfiniteTimeReward(N[2, 1, 0], N(9//10), N(1//1_000_000))) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) + + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) + + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end - # Expected exit time - @testset "expected exit time" begin - prop = ExpectedExitTime([3], 1e-6) - spec = Specification(prop, Pessimistic, Maximize) + # Expected exit time + @testset "expected exit time" begin + prop = ExpectedExitTime([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic, Maximize) - problem = VerificationProblem(mdp, spec) - V, k, res = solve(problem) - V = Vector(V) # Convert to CPU for testing - res = Vector(res) # Convert to CPU for testing + problem = VerificationProblem(mdp, spec) + V, k, res = solve(problem) - problem_implicit = VerificationProblem(implicit_mdp, spec) - V_implicit, k_implicit, res_implicit = solve(problem_implicit) - V_implicit = Vector(V_implicit) # Convert to CPU for testing - res_implicit = Vector(res_implicit) # Convert to CPU for testing + problem_implicit = VerificationProblem(implicit_mdp, spec) + V_implicit, k_implicit, res_implicit = solve(problem_implicit) - @test V ≈ V_implicit - @test k == k_implicit - @test res ≈ res_implicit + @test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_implicit) atol=1e-5 + @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 + end end -end +end \ No newline at end of file diff --git a/test/cuda/sparse/synthesis.jl b/test/cuda/sparse/synthesis.jl index 4bc605d9..29ff45c5 100644 --- a/test/cuda/sparse/synthesis.jl +++ b/test/cuda/sparse/synthesis.jl @@ -1,7 +1,8 @@ using Revise, Test -using IntervalMDP, SparseArrays, CUDA +using IntervalMDP, CUDA, SparseArrays -prob1 = IntervalProbabilities(; + +prob1 = IntervalAmbiguitySets(; lower = sparse([ 0.0 0.5 0.1 0.3 @@ -14,7 +15,7 @@ prob1 = IntervalProbabilities(; ]), ) -prob2 = IntervalProbabilities(; +prob2 = IntervalAmbiguitySets(; lower = sparse([ 0.1 0.2 0.2 0.3 @@ -27,59 +28,67 @@ prob2 = IntervalProbabilities(; ]), ) -prob3 = IntervalProbabilities(; +prob3 = IntervalAmbiguitySets(; lower = sparse([ - 0.0 - 0.0 - 1.0 - ][:, :]), + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ]), upper = sparse([ - 0.0 - 0.0 - 1.0 - ][:, :]), + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ]) ) transition_probs = [prob1, prob2, prob3] istates = [Int32(1)] -mdp = IntervalMarkovDecisionProcess(transition_probs, istates) -mdp = IntervalMDP.cu(mdp) +mdp = IntervalMDP.cu(IntervalMarkovDecisionProcess(transition_probs, istates)) # Finite time reachability prop = FiniteTimeReachability([3], 10) spec = Specification(prop, Pessimistic, Maximize) problem = ControlSynthesisProblem(mdp, spec) -policy, V, k, res = solve(problem) +sol = solve(problem) +policy, V, k, res = sol + +@test strategy(sol) == policy +@test value_function(sol) == V +@test num_iterations(sol) == k +@test residual(sol) == res + +policy = IntervalMDP.cpu(policy) # Convert to CPU for testing @test policy isa TimeVaryingStrategy @test time_length(policy) == 10 for k in 1:time_length(policy) - @test Vector(policy[k]) == [1, 2, 1] + @test policy[k] == [(1,), (2,), (1,)] end # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP -problem = VerificationProblem(mdp, spec, policy) +problem = VerificationProblem(mdp, spec, IntervalMDP.cu(policy)) V_mc, k, res = solve(problem) @test V ≈ V_mc # Finite time reward -prop = FiniteTimeReward([2.0, 1.0, 0.0], 0.9, 10) -prop = IntervalMDP.cu(prop) +prop = IntervalMDP.cu(FiniteTimeReward([2.0, 1.0, 0.0], 0.9, 10)) spec = Specification(prop, Pessimistic, Maximize) problem = ControlSynthesisProblem(mdp, spec) - policy, V, k, res = solve(problem) +policy = IntervalMDP.cpu(policy) # Convert to CPU for testing + +@test policy isa TimeVaryingStrategy @test time_length(policy) == 10 for k in 1:time_length(policy) - @test Vector(policy[k]) == [2, 2, 1] + @test policy[k] == [(2,), (2,), (1,)] end # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP -problem = VerificationProblem(mdp, spec, policy) +problem = VerificationProblem(mdp, spec, IntervalMDP.cu(policy)) V_mc, k, res = solve(problem) -@test V ≈ V_mc +@test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_mc) atol=1e-5 # Infinite time reachability prop = InfiniteTimeReachability([3], 1e-6) @@ -87,13 +96,15 @@ spec = Specification(prop, Pessimistic, Maximize) problem = ControlSynthesisProblem(mdp, spec) policy, V, k, res = solve(problem) +policy = IntervalMDP.cpu(policy) # Convert to CPU for testing + @test policy isa StationaryStrategy -@test IntervalMDP.cpu(policy)[1] == [1, 2, 1] +@test policy[1] == [(1,), (2,), (1,)] # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP -problem = VerificationProblem(mdp, spec, policy) +problem = VerificationProblem(mdp, spec, IntervalMDP.cu(policy)) V_mc, k, res = solve(problem) -@test V ≈ V_mc +@test IntervalMDP.cpu(V) ≈ IntervalMDP.cpu(V_mc) atol=1e-5 # Finite time safety prop = FiniteTimeSafety([3], 10) @@ -101,14 +112,36 @@ spec = Specification(prop, Pessimistic, Maximize) problem = ControlSynthesisProblem(mdp, spec) policy, V, k, res = solve(problem) +policy = IntervalMDP.cpu(policy) # Convert to CPU for testing +V = IntervalMDP.cpu(V) # Convert to CPU for testing + @test all(V .>= 0.0) -@test CUDA.@allowscalar(V[3]) ≈ 0.0 +@test V[3] ≈ 0.0 @test policy isa TimeVaryingStrategy @test time_length(policy) == 10 for k in 1:(time_length(policy) - 1) - @test Vector(policy[k]) == [2, 2, 1] + @test policy[k] == [(2,), (2,), (1,)] end # The last time step (aka. the first value iteration step) has a different strategy. -@test Vector(policy[time_length(policy)]) == [2, 1, 1] +@test policy[time_length(policy)] == [(2,), (1,), (1,)] + +@testset "implicit sink state" begin + transition_probs = [prob1, prob2] + mdp = IntervalMarkovDecisionProcess(transition_probs) + + # Finite time reachability + prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = ControlSynthesisProblem(mdp, spec) + policy, V, k, res = solve(problem) + + policy = IntervalMDP.cpu(policy) # Convert to CPU for testing + + @test policy isa TimeVaryingStrategy + @test time_length(policy) == 10 + for k in 1:time_length(policy) + @test policy[k] == [(1,), (2,)] + end +end diff --git a/test/cuda/sparse/vi.jl b/test/cuda/sparse/vi.jl index 47232128..1e663e1e 100644 --- a/test/cuda/sparse/vi.jl +++ b/test/cuda/sparse/vi.jl @@ -1,108 +1,194 @@ using Revise, Test -using IntervalMDP, SparseArrays, CUDA - -prob = IntervalProbabilities(; - lower = sparse_hcat( - SparseVector(3, [2, 3], [0.1, 0.2]), - SparseVector(3, [1, 2, 3], [0.5, 0.3, 0.1]), - SparseVector(3, [3], [1.0]), - ), - upper = sparse_hcat( - SparseVector(3, [1, 2, 3], [0.5, 0.6, 0.7]), - SparseVector(3, [1, 2, 3], [0.7, 0.5, 0.3]), - SparseVector(3, [3], [1.0]), - ), -) - -mc = IntervalMDP.cu(IntervalMarkovChain(prob, [1])) -prop = FiniteTimeReachability([3], 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -prop = FiniteTimeReachability([3], 11) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it2, k, _ = solve(problem) -@test k == 11 -@test all(V_fixed_it2 .>= 0.0) -@test all(V_fixed_it .<= V_fixed_it2) - -prop = InfiniteTimeReachability([3], 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = FiniteTimeReachAvoid([3], [2], 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -prop = FiniteTimeReachAvoid([3], [2], 11) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it2, k, _ = solve(problem) -@test k == 11 -@test all(V_fixed_it2 .>= 0.0) -@test all(V_fixed_it .<= V_fixed_it2) - -prop = InfiniteTimeReachAvoid([3], [2], 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = FiniteTimeSafety([3], 10) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -prop = FiniteTimeSafety([3], 11) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it2, k, _ = solve(problem) -@test k == 11 -@test all(V_fixed_it2 .>= 0.0) -@test all(V_fixed_it2 .<= V_fixed_it) - -prop = InfiniteTimeSafety([3], 1e-6) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = IntervalMDP.cu(FiniteTimeReward([2.0, 1.0, 0.0], 0.9, 10)) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 -@test all(V_fixed_it .>= 0.0) - -prop = IntervalMDP.cu(FiniteTimeReward([2.0, 1.0, -1.0], 0.9, 10)) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_fixed_it, k, _ = solve(problem) -@test k == 10 - -prop = IntervalMDP.cu(InfiniteTimeReward([2.0, 1.0, 0.0], 0.9, 1e-6)) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 -@test all(V_conv .>= 0.0) - -prop = IntervalMDP.cu(InfiniteTimeReward([2.0, 1.0, -1.0], 0.9, 1e-6)) -spec = Specification(prop, Pessimistic) -problem = VerificationProblem(mc, spec) -V_conv, _, u = solve(problem) -@test maximum(u) <= 1e-6 +using IntervalMDP, CUDA, SparseArrays + +@testset for N in [Float32, Float64] + prob = IntervalAmbiguitySets(; + lower = sparse(N[ + 0 1//2 0 + 1//10 3//10 0 + 1//5 1//10 1 + ]), + upper = sparse(N[ + 1//2 7//10 0 + 3//5 1//2 0 + 7//10 3//10 1 + ]), + ) + + mc = IntervalMDP.cu(IntervalMarkovChain(prob, [1])) + @test IntervalMDP.cpu(initial_states(mc)) == [1] + + mc = IntervalMDP.cu(IntervalMarkovChain(prob)) + mc_cpu = IntervalMarkovChain(prob) # For comparison + + prop = FiniteTimeReachability([3], 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + sol = solve(problem) + V_fixed_it, k, res = sol + + @test value_function(sol) == V_fixed_it + @test num_iterations(sol) == k + @test residual(sol) == res + + V_fixed_it = IntervalMDP.cpu(V_fixed_it) # Convert to CPU for testing + @test k == 10 + @test all(V_fixed_it .>= N(0)) + @test all(V_fixed_it .<= N(1)) + + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = FiniteTimeReachability([3], 11) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, res = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) # Convert to CPU for testing + @test k == 11 + @test all(V_fixed_it .<= V_fixed_it2) + + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it2 ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = InfiniteTimeReachability([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) # Convert to CPU for testing + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + + prop = FiniteTimeReachAvoid([3], [2], 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it, k, res = solve(problem) + V_fixed_it = IntervalMDP.cpu(V_fixed_it) # Convert to CPU for testing + @test k == 10 + @test all(V_fixed_it .>= N(0)) + @test all(V_fixed_it .<= N(1)) + + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = FiniteTimeReachAvoid([3], [2], 11) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, res = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) # Convert to CPU for testing + @test k == 11 + @test all(V_fixed_it2 .>= N(0)) + @test all(V_fixed_it2 .<= N(1)) + @test all(V_fixed_it .<= V_fixed_it2) + + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it2 ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = InfiniteTimeReachAvoid([3], [2], N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) # Convert to CPU for testing + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + + prop = FiniteTimeSafety([3], 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it, k, res = solve(problem) + V_fixed_it = IntervalMDP.cpu(V_fixed_it) # Convert to CPU for testing + @test k == 10 + @test all(V_fixed_it .>= N(0)) + @test all(V_fixed_it .<= N(1)) + + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = FiniteTimeSafety([3], 11) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, res = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) # Convert to CPU for testing + @test k == 11 + @test all(V_fixed_it2 .>= N(0)) + @test all(V_fixed_it2 .<= N(1)) + @test all(V_fixed_it2 .<= V_fixed_it) + + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it2 ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = InfiniteTimeSafety([3], N(1//1_000_000)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) # Convert to CPU for testing + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + @test all(V_conv .<= N(1)) + + prop = IntervalMDP.cu(FiniteTimeReward(N[2, 1, 0], N(9//10), 10)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it, k, res = solve(problem) + V_fixed_it = IntervalMDP.cpu(V_fixed_it) # Convert to CPU for testing + @test k == 10 + @test all(V_fixed_it .>= N(0)) + + prop = FiniteTimeReward(N[2, 1, 0], N(9//10), 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = IntervalMDP.cu(FiniteTimeReward(N[2, 1, -1], N(9//10), 10)) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_fixed_it2, k, res = solve(problem) + V_fixed_it2 = IntervalMDP.cpu(V_fixed_it2) # Convert to CPU for testing + @test k == 10 + @test all(V_fixed_it2 .<= V_fixed_it) + + prop = FiniteTimeReward(N[2, 1, -1], N(9//10), 10) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc_cpu, spec) + V_fixed_it_cpu, k_cpu, res_cpu = solve(problem) + @test k == k_cpu + @test V_fixed_it2 ≈ V_fixed_it_cpu atol=1e-5 + @test IntervalMDP.cpu(res) ≈ res_cpu atol=1e-5 + + prop = IntervalMDP.cu(InfiniteTimeReward(N[2, 1, 0], N(9//10), N(1//1_000_000))) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) # Convert to CPU for testing + @test maximum(u) <= N(1//1_000_000) + @test all(V_conv .>= N(0)) + + prop = IntervalMDP.cu(InfiniteTimeReward(N[2, 1, -1], N(9//10), N(1//1_000_000))) + spec = Specification(prop, Pessimistic) + problem = VerificationProblem(mc, spec) + V_conv, _, u = solve(problem) + V_conv = IntervalMDP.cpu(V_conv) # Convert to CPU for testing + @test maximum(u) <= N(1//1_000_000) +end \ No newline at end of file From 2db8ac7203ca54264b2ca2b4c37f40007ac87159 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 29 Sep 2025 15:42:49 +0200 Subject: [PATCH 52/71] Re-add bellman documentation --- src/bellman.jl | 152 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/src/bellman.jl b/src/bellman.jl index 9aa356b6..eac8348f 100644 --- a/src/bellman.jl +++ b/src/bellman.jl @@ -1,9 +1,161 @@ +""" + bellman(V, model; upper_bound = false, maximize = true) + +Compute robust Bellman update with the value function `V` and the model `model`, e.g. [`IntervalMarkovDecisionProcess`](@ref), +that upper or lower bounds the expectation of the value function `V`. +Whether the expectation is maximized or minimized is determined by the `upper_bound` keyword argument. +That is, if `upper_bound == true` then an upper bound is computed and if `upper_bound == false` then a lower +bound is computed. + +### Examples +```jldoctest +using IntervalMDP + +prob1 = IntervalAmbiguitySets(; + lower = [ + 0.0 0.5 + 0.1 0.3 + 0.2 0.1 + ], + upper = [ + 0.5 0.7 + 0.6 0.5 + 0.7 0.3 + ], +) + +prob2 = IntervalAmbiguitySets(; + lower = [ + 0.1 0.2 + 0.2 0.3 + 0.3 0.4 + ], + upper = [ + 0.6 0.6 + 0.5 0.5 + 0.4 0.4 + ], +) + +prob3 = IntervalAmbiguitySets(; + lower = [ + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ], + upper = [ + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ] +) + +transition_probs = [prob1, prob2, prob3] +istates = [Int32(1)] + +model = IntervalMarkovDecisionProcess(transition_probs, istates) + +Vprev = [1.0, 2.0, 3.0] +Vcur = IntervalMDP.bellman(Vprev, model; upper_bound = false) + +# output + +3-element Vector{Float64}: + 1.7 + 2.1 + 3.0 +``` + +!!! note + This function will construct a workspace object, a strategy cache, and an output vector. + For a hot-loop, it is more efficient to use `bellman!` and pass in pre-allocated objects. + +""" function bellman(V, model, alg=default_bellman_algorithm(model); upper_bound = false, maximize = true) Vres = similar(V, source_shape(model)) return bellman!(Vres, V, model, alg; upper_bound = upper_bound, maximize = maximize) end +""" + bellman!(workspace, strategy_cache, Vres, V, model; upper_bound = false, maximize = true) + +Compute in-place robust Bellman update with the value function `V` and the model `model`, +e.g. [`IntervalMarkovDecisionProcess`](@ref), that upper or lower bounds the expectation of the value function `V`. +Whether the expectation is maximized or minimized is determined by the `upper_bound` keyword argument. +That is, if `upper_bound == true` then an upper bound is computed and if `upper_bound == false` then a lower +bound is computed. + +The output is constructed in the input `Vres` and returned. The workspace object is also modified, +and depending on the type, the strategy cache may be modified as well. See `construct_workspace` +and `construct_strategy_cache` for more details on how to pre-allocate the workspace and strategy cache. + +### Examples + +```jldoctest +using IntervalMDP + +prob1 = IntervalAmbiguitySets(; + lower = [ + 0.0 0.5 + 0.1 0.3 + 0.2 0.1 + ], + upper = [ + 0.5 0.7 + 0.6 0.5 + 0.7 0.3 + ], +) + +prob2 = IntervalAmbiguitySets(; + lower = [ + 0.1 0.2 + 0.2 0.3 + 0.3 0.4 + ], + upper = [ + 0.6 0.6 + 0.5 0.5 + 0.4 0.4 + ], +) + +prob3 = IntervalAmbiguitySets(; + lower = [ + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ], + upper = [ + 0.0 0.0 + 0.0 0.0 + 1.0 1.0 + ] +) + +transition_probs = [prob1, prob2, prob3] +istates = [Int32(1)] + +model = IntervalMarkovDecisionProcess(transition_probs, istates) + +Vprev = [1.0, 2.0, 3.0] +workspace = IntervalMDP.construct_workspace(model) +strategy_cache = IntervalMDP.construct_strategy_cache(model) +Vcur = similar(Vprev) + +IntervalMDP.bellman!(workspace, strategy_cache, Vcur, Vprev, model; upper_bound = false, maximize = true) + +# output + +3-element Vector{Float64}: + 1.7 + 2.1 + 3.0 +``` +""" +function bellman! end + function bellman!(Vres, V, model, alg=default_bellman_algorithm(model); upper_bound = false, maximize = true) workspace = construct_workspace(model, alg) strategy_cache = construct_strategy_cache(model) From c6405ae32134309803370889911e6d2a2e77a143 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 29 Sep 2025 17:53:08 +0200 Subject: [PATCH 53/71] Update property docstrings --- src/specification.jl | 50 ++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/src/specification.jl b/src/specification.jl index e5a8b760..a81caf27 100644 --- a/src/specification.jl +++ b/src/specification.jl @@ -224,10 +224,10 @@ postprocess_value_function!(value_function, ::AbstractReachability) = nothing FiniteTimeReachability{VT <: Vector{<:CartesianIndex}, T <: Integer} Finite time reachability specified by a set of target/terminal states and a time horizon. -That is, denote a trace by ``s_1 s_2 s_3 \\cdots``, then if ``T`` is the set of target states and ``H`` is the time horizon, +That is, denote a trace by ``\\omega = s_1 s_2 s_3 \\cdots``, then if ``G`` is the set of target states and ``K`` is the time horizon, the property is ```math - \\mathbb{P}(\\exists k = \\{0, \\ldots, H\\}, s_k \\in T). + \\mathbb{P}^{\\pi, \\eta}_{\\mathrm{reach}}(G, K) = \\mathbb{P}^{\\pi, \\eta} \\left[\\omega \\in \\Omega : \\exists k \\in \\{0, \\ldots, K\\}, \\, \\omega[k] \\in G \\right]. ``` """ struct FiniteTimeReachability{VT <: Vector{<:CartesianIndex}, T <: Integer} <: @@ -275,7 +275,7 @@ end """ InfiniteTimeReachability{R <: Real, VT <: Vector{<:CartesianIndex}} -`InfiniteTimeReachability` is similar to [`FiniteTimeReachability`](@ref) except that the time horizon is infinite, i.e., ``H = \\infty``. +`InfiniteTimeReachability` is similar to [`FiniteTimeReachability`](@ref) except that the time horizon is infinite, i.e., ``K = \\infty``. In practice it means, performing the value iteration until the value function has converged, defined by some threshold `convergence_eps`. The convergence threshold is that the largest value of the most recent Bellman residual is less than `convergence_eps`. """ @@ -325,10 +325,10 @@ end ExactTimeReachability{VT <: Vector{<:CartesianIndex}, T <: Integer} Exact time reachability specified by a set of target/terminal states and a time horizon. -That is, denote a trace by ``s_1 s_2 s_3 \\cdots``, then if ``T`` is the set of target states and ``H`` is the time horizon, +That is, denote a trace by ``\\omega = s_1 s_2 s_3 \\cdots``, then if ``G`` is the set of target states and ``K`` is the time horizon, the property is ```math - \\mathbb{P}(s_H \\in T). + \\mathbb{P}^{\\pi, \\eta}_{\\mathrm{exact-reach}}(G, K) = \\mathbb{P}^{\\pi, \\eta} \\left[\\omega \\in \\Omega : \\omega[K] \\in G \\right]. ``` """ struct ExactTimeReachability{VT <: Vector{<:CartesianIndex}, T <: Integer} <: @@ -427,10 +427,10 @@ end FiniteTimeReachAvoid{VT <: AbstractVector{<:CartesianIndex}}, T <: Integer} Finite time reach-avoid specified by a set of target/terminal states, a set of avoid states, and a time horizon. -That is, denote a trace by ``s_1 s_2 s_3 \\cdots``, then if ``T`` is the set of target states, ``A`` is the set of states to avoid, -and ``H`` is the time horizon, the property is +That is, denote a trace by ``\\omega = s_1 s_2 s_3 \\cdots``, then if ``G`` is the set of target states, ``O`` is the set of states to avoid, +and ``K`` is the time horizon, the property is ```math - \\mathbb{P}(\\exists k = \\{0, \\ldots, H\\}, s_k \\in T, \\text{ and } \\forall k' = \\{0, \\ldots, k\\}, s_k' \\notin A). + \\mathbb{P}^{\\pi, \\eta}_{\\mathrm{reach-avoid}}(G, O, K) = \\mathbb{P}^{\\pi, \\eta} \\left[\\omega \\in \\Omega : \\exists k \\in \\{0, \\ldots, K\\}, \\, \\omega[k] \\in G, \\; \\forall k' \\in \\{0, \\ldots, k' \\}, \\, \\omega[k] \\notin O \\right]. ``` """ struct FiniteTimeReachAvoid{VT <: AbstractVector{<:CartesianIndex}, T <: Integer} <: @@ -494,7 +494,7 @@ end """ InfiniteTimeReachAvoid{R <: Real, VT <: AbstractVector{<:CartesianIndex}} -`InfiniteTimeReachAvoid` is similar to [`FiniteTimeReachAvoid`](@ref) except that the time horizon is infinite, i.e., ``H = \\infty``. +`InfiniteTimeReachAvoid` is similar to [`FiniteTimeReachAvoid`](@ref) except that the time horizon is infinite, i.e., ``K = \\infty``. """ struct InfiniteTimeReachAvoid{R <: Real, VT <: AbstractVector{<:CartesianIndex}} <: AbstractReachAvoid @@ -558,10 +558,10 @@ end ExactTimeReachAvoid{VT <: AbstractVector{<:CartesianIndex}}, T <: Integer} Exact time reach-avoid specified by a set of target/terminal states, a set of avoid states, and a time horizon. -That is, denote a trace by ``s_1 s_2 s_3 \\cdots``, then if ``T`` is the set of target states, ``A`` is the set of states to avoid, -and ``H`` is the time horizon, the property is +That is, denote a trace by ``\\omega = s_1 s_2 s_3 \\cdots``, then if ``G`` is the set of target states, ``O`` is the set of states to avoid, +and ``K`` is the time horizon, the property is ```math - \\mathbb{P}(s_H \\in T, \\text{ and } \\forall k = \\{0, \\ldots, H\\}, s_k \\notin A). + \\mathbb{P}^{\\pi, \\eta}_{\\mathrm{exact-reach-avoid}}(G, O, K) = \\mathbb{P}^{\\pi, \\eta} \\left[\\omega \\in \\Omega : \\omega[K] \\in G, \\; \\forall k \\in \\{0, \\ldots, K\\}, \\, \\omega[k] \\notin O \\right]. ``` """ struct ExactTimeReachAvoid{VT <: AbstractVector{<:CartesianIndex}, T <: Integer} <: @@ -651,10 +651,10 @@ end FiniteTimeSafety{VT <: Vector{<:CartesianIndex}, T <: Integer} Finite time safety specified by a set of avoid states and a time horizon. -That is, denote a trace by ``s_1 s_2 s_3 \\cdots``, then if ``A`` is the set of avoid states and ``H`` is the time horizon, +That is, denote a trace by ``\\omega = s_1 s_2 s_3 \\cdots``, then if ``O`` is the set of avoid states and ``K`` is the time horizon, the property is ```math - \\mathbb{P}(\\forall k = \\{0, \\ldots, H\\}, s_k \\notin A). + \\mathbb{P}^{\\pi, \\eta}_{\\mathrm{safe}}(O, K) = \\mathbb{P}^{\\pi, \\eta} \\left[\\omega \\in \\Omega : \\forall k \\in \\{0, \\ldots, K\\}, \\, \\omega[k] \\notin O \\right]. ``` """ struct FiniteTimeSafety{VT <: Vector{<:CartesianIndex}, T <: Integer} <: AbstractSafety @@ -701,7 +701,7 @@ end """ InfiniteTimeSafety{R <: Real, VT <: Vector{<:CartesianIndex}} -`InfiniteTimeSafety` is similar to [`FiniteTimeSafety`](@ref) except that the time horizon is infinite, i.e., ``H = \\infty``. +`InfiniteTimeSafety` is similar to [`FiniteTimeSafety`](@ref) except that the time horizon is infinite, i.e., ``K = \\infty``. In practice it means, performing the value iteration until the value function has converged, defined by some threshold `convergence_eps`. The convergence threshold is that the largest value of the most recent Bellman residual is less than `convergence_eps`. """ @@ -785,11 +785,10 @@ end """ FiniteTimeReward{R <: Real, AR <: AbstractArray{R}, T <: Integer} -`FiniteTimeReward` is a property of rewards ``R : S \\to \\mathbb{R}`` assigned to each state at each iteration -and a discount factor ``\\gamma``. The time horizon ``H`` is finite, so the discount factor is optional and -the optimal policy will be time-varying. Given a strategy ``\\pi : S \\to A``, the property is +`FiniteTimeReward` is a property of rewards ``r : S \\to \\mathbb{R}`` assigned to each state at each iteration +and a discount factor ``\\nu``. The time horizon ``K`` is finite, so the discount factor can be greater than or equal to one. The property is ```math - V(s_0) = \\mathbb{E}\\left[\\sum_{k=0}^{H} \\gamma^k R(s_k) \\mid s_0, \\pi\\right]. + \\mathbb{E}^{\\pi,\\eta}_{\\mathrm{reward}}(r, \\nu, K) = \\mathbb{E}^{\\pi,\\eta}\\left[\\sum_{k=0}^{K} \\nu^k r(\\omega[k]) \\right]. ``` """ struct FiniteTimeReward{R <: Real, AR <: AbstractArray{R}, T <: Integer} <: @@ -842,8 +841,7 @@ end InfiniteTimeReward{R <: Real, AR <: AbstractArray{R}} `InfiniteTimeReward` is a property of rewards assigned to each state at each iteration -and a discount factor for guaranteed convergence. The time horizon is infinite, i.e. ``H = \\infty``, so the optimal -policy will be stationary. +and a discount factor for guaranteed convergence. The time horizon is infinite, i.e. ``K = \\infty``. """ struct InfiniteTimeReward{R <: Real, AR <: AbstractArray{R}} <: AbstractReward{R} reward::AR @@ -917,15 +915,13 @@ postprocess_value_function!(value_function, ::AbstractHittingTime) = value_funct `ExpectedExitTime` is a property of hitting time with respect to an unsafe set. An equivalent characterization is that of the expected number of steps in the safe set until reaching the unsafe set. -The time horizon is infinite, i.e., ``H = \\infty``, thus the package performs value iteration until the value function +The time horizon is infinite, i.e., ``K = \\infty``, thus the package performs value iteration until the value function has converged. The convergence threshold is that the largest value of the most recent Bellman residual is less than `convergence_eps`. -As this is an infinite horizon property, the resulting optimal policy will be stationary. -In formal language, given a strategy ``\\pi : S \\to A`` and an unsafe set ``O``, the property is defined as +Given an unsafe set ``O``, the property is defined as ```math - V(s_0) = \\mathbb{E}\\left[\\lvert \\omega_{0:k-1} \\rvert \\mid s_0, \\pi, \\omega_{0:k-1} \\notin O, \\omega_k \\in O \\right] + \\mathbb{E}^{\\pi,\\eta}_{\\mathrm{exit}}(O) = \\mathbb{E}^{\\pi,\\eta}\\left[k : \\omega[k] \\in O, \\, \\forall k' \\in \\{0, \\ldots, k\\}, \\, \\omega[k'] \\notin O \\right]. ``` -where ``\\omega = s_0 s_1 \\ldots s_k`` is the trajectory of the system, ``\\omega_{0:k-1} = s_0 s_1 \\ldots s_{k-1}`` denotes the subtrajectory -excluding the final state, and ``\\omega_k = s_k``. +where ``\\omega = s_0 s_1 \\ldots s_k`` is a trace of the system. """ struct ExpectedExitTime{R <: Real, VT <: Vector{<:CartesianIndex}} <: AbstractHittingTime avoid_states::VT From 252942b57bc1f95bbcd22344d15be91ff64e668d Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Tue, 30 Sep 2025 13:54:12 +0200 Subject: [PATCH 54/71] Fix maxsupportsize --- ext/cuda/array.jl | 6 - ext/cuda/bellman/sparse.jl | 160 ++++++++++++++++++--- ext/cuda/probabilities.jl | 8 +- src/cuda.jl | 11 +- src/probabilities/IntervalAmbiguitySets.jl | 3 +- src/utils.jl | 3 + test/cuda/sparse/bellman.jl | 44 +++++- test/cuda/sparse/imdp.jl | 2 +- 8 files changed, 195 insertions(+), 42 deletions(-) diff --git a/ext/cuda/array.jl b/ext/cuda/array.jl index 4b36a2a5..d2357f52 100644 --- a/ext/cuda/array.jl +++ b/ext/cuda/array.jl @@ -81,9 +81,3 @@ Adapt.adapt_storage(::Type{<:IntervalMDP.CpuModelAdaptor{Tv1}}, x::CuArray{Tv2}) Adapt.adapt_storage(::Type{<:IntervalMDP.CpuModelAdaptor{Tv1}}, x::CuArray{NTuple{N, T}}) where {Tv1, N, T <: Integer} = adapt(Array{NTuple{N, T}}, x) - -const CuSparseDeviceColumnView{Tv, Ti} = SubArray{Tv, 1, <:CuSparseDeviceMatrixCSC{Tv, Ti}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} -IntervalMDP.support(p::IntervalMDP.IntervalAmbiguitySet{R, <:CuSparseDeviceColumnView{R}}) where {R} = rowvals(p.gap) -IntervalMDP.supportsize(p::IntervalMDP.IntervalAmbiguitySet{R, <:CuSparseDeviceColumnView{R}}) where {R} = nnz(p.gap) - -IntervalMDP.maxsupportsize(p::IntervalMDP.IntervalAmbiguitySets{R, <:CuSparseMatrixCSC{R}}) where {R} = maxdiff(SparseArrays.getcolptr(p.gap)) diff --git a/ext/cuda/bellman/sparse.jl b/ext/cuda/bellman/sparse.jl index 7c7089d5..4aa45629 100644 --- a/ext/cuda/bellman/sparse.jl +++ b/ext/cuda/bellman/sparse.jl @@ -38,7 +38,7 @@ function IntervalMDP._bellman_helper!( return Vres end - # Try if we can fit all values and permutation indices into shared memory (25% less memory relative to (Tv, Tv)) + # Try if we can fit all values and permutation indices into shared memory (25% less memory relative to (Float64, Float64)) if try_large_sparse_bellman!( Tv, Int32, @@ -53,7 +53,22 @@ function IntervalMDP._bellman_helper!( return Vres end - # Try if we can fit permutation indices into shared memory (50% less memory relative to (Tv, Tv)) + # Try if we can fit two permutation indices into shared memory (50% less memory relative to (Float64, Float64)) + if try_large_sparse_bellman!( + Int32, + Int32, + workspace, + strategy_cache, + Vres, + V, + model; + upper_bound = upper_bound, + maximize = maximize, + ) + return Vres + end + + # Try if we can fit permutation indices into shared memory (75% less memory relative to (Float64, Float64)) if try_large_sparse_bellman!( Int32, Nothing, @@ -68,7 +83,7 @@ function IntervalMDP._bellman_helper!( return Vres end - throw(IntervalMDP.OutOfSharedMemory(workspace.max_support * sizeof(Int32))) + throw(IntervalMDP.OutOfSharedMemory(workspace.max_support * sizeof(Int32), CUDA.limit(CUDA.LIMIT_SHMEM_SIZE))) end function try_small_sparse_bellman!( @@ -336,18 +351,14 @@ end lane ) assume(warpsize() == 32) - warp_aligned_length = kernel_nextwarp(IntervalMDP.supportsize(ambiguity_set)) - support = IntervalMDP.support(ambiguity_set) # Copy into shared memory + gap_nonzeros = nonzeros(gap(ambiguity_set)) s = lane - @inbounds while s <= warp_aligned_length - if s <= IntervalMDP.supportsize(ambiguity_set) - idx = support[s] - value[s] = V[idx] - prob[s] = gap(ambiguity_set, idx) - end + @inbounds while s <= IntervalMDP.supportsize(ambiguity_set) + value[s] = V[support[s]] + prob[s] = gap_nonzeros[s] s += warpsize() end @@ -368,13 +379,13 @@ end support = IntervalMDP.support(ambiguity_set) # Add the lower bound multiplied by the value + lower_nonzeros = nonzeros(lower(ambiguity_set)) s = lane @inbounds while s <= warp_aligned_length # Find index of the permutation, and lookup the corresponding lower bound and multipy by the value if s <= IntervalMDP.supportsize(ambiguity_set) - idx = support[s] - l = lower(ambiguity_set, idx) - lower_value += l * V[idx] + l = lower_nonzeros[s] + lower_value += l * V[support[s]] used += l end s += warpsize() @@ -455,6 +466,10 @@ function try_large_sparse_bellman!( shmem = workspace.max_support * (sizeof(T1) + sizeof(T2)) + n_actions * sizeof(Tv) + if shmem > CUDA.limit(CUDA.LIMIT_SHMEM_SIZE) # Early exit if we cannot fit into shared memory + return false + end + kernel = @cuda launch = false large_sparse_bellman_kernel!( T1, T2, @@ -468,7 +483,7 @@ function try_large_sparse_bellman!( ) config = launch_configuration(kernel.fun; shmem = shmem) - max_threads = prevwarp(device(), config.threads) # 1600008 bytes + max_threads = prevwarp(device(), config.threads) if max_threads < 32 return false @@ -723,12 +738,12 @@ end support = IntervalMDP.support(ambiguity_set) # Add the lower bound multiplied by the value + lower_nonzeros = nonzeros(lower(ambiguity_set)) s = threadIdx().x @inbounds while s <= supportsize # Find index of the permutation, and lookup the corresponding lower bound and multipy by the value - idx = support[s] - l = lower(ambiguity_set, idx) - lower_value += l * V[idx] + l = lower_nonzeros[s] + lower_value += l * V[support[s]] used += l s += blockDim().x end @@ -757,11 +772,12 @@ end supportsize = IntervalMDP.supportsize(ambiguity_set) # Copy into shared memory + gap_nonzeros = nonzeros(gap(ambiguity_set)) s = threadIdx().x @inbounds while s <= supportsize idx = support[s] value[s] = V[idx] - prob[s] = gap(ambiguity_set, idx) + prob[s] = gap_nonzeros[s] s += blockDim().x end @@ -854,9 +870,8 @@ end # Copy into shared memory s = threadIdx().x @inbounds while s <= supportsize - idx = support[s] - value[s] = V[idx] - perm[s] = idx + value[s] = V[support[s]] + perm[s] = s s += blockDim().x end @@ -878,11 +893,12 @@ end gap_value = zero(Tv) # Block-strided loop and save into register `gap_value` + gap_nonzeros = nonzeros(gap(ambiguity_set)) s = threadIdx().x @inbounds while s <= warp_aligned_length # Find index of the permutation, and lookup the corresponding gap g = if s <= length(value) - gap(ambiguity_set, perm[s]) + gap_nonzeros[perm[s]] else # 0 gap is a neural element zero(Tv) @@ -924,6 +940,104 @@ end return gap_value end +@inline function state_action_sparse_omaximization!( + Vperm::AbstractVector{Int32}, + Pperm::AbstractVector{Int32}, + V, + ambiguity_set, + value_lt +) + ii_sparse_initialize_sorting_shared_memory!(ambiguity_set, Vperm, Pperm) + + Vperm = @view Vperm[1:IntervalMDP.supportsize(ambiguity_set)] + Pperm = @view Pperm[1:IntervalMDP.supportsize(ambiguity_set)] + block_bitonic_sortperm!(V, Vperm, Pperm, value_lt) + + value, remaining = add_lower_mul_V_block(V, ambiguity_set) + value += ii_add_gap_mul_V_sparse(V, Vperm, Pperm, ambiguity_set, remaining) + + return value +end + +@inline function ii_sparse_initialize_sorting_shared_memory!(ambiguity_set, Vperm, Pperm) + support = IntervalMDP.support(ambiguity_set) + supportsize = IntervalMDP.supportsize(ambiguity_set) + + # Copy into shared memory + i = threadIdx().x + @inbounds while i <= supportsize + Vperm[i] = support[i] + Pperm[i] = i + i += blockDim().x + end + + # Need to synchronize to make sure all agree on the shared memory + sync_threads() +end + +@inline function ii_add_gap_mul_V_sparse( + value, + Vperm, + Pperm, + ambiguity_set, + remaining::Tv, +) where {Tv} + assume(warpsize() == 32) + wid, lane = fldmod1(threadIdx().x, warpsize()) + reduction_ws = CuStaticSharedArray(Tv, 32) + + warp_aligned_length = kernel_nextwarp(length(Vperm)) + gap_value = zero(Tv) + + # Block-strided loop and save into register `gap_value` + gap_nonzeros = nonzeros(gap(ambiguity_set)) + s = threadIdx().x + @inbounds while s <= warp_aligned_length + # Find index of the permutation, and lookup the corresponding gap + g = if s <= length(Vperm) + gap_nonzeros[Pperm[s]] + else + # 0 gap is a neural element + zero(Tv) + end + + # Cummulatively sum the gap with a tree reduction + cum_gap = cumsum_block(g, reduction_ws, wid, lane) + + # Update the remaining probability + remaining -= cum_gap + remaining += g + + # Update the probability + if s <= length(Vperm) + sub = clamp(remaining, zero(Tv), g) + gap_value += sub * value[Vperm[s]] + remaining -= sub + end + + # Update the remaining probability from the last thread in the block + if threadIdx().x == blockDim().x + reduction_ws[1] = remaining + end + sync_threads() + + remaining = reduction_ws[1] + sync_threads() + + # Early exit if the remaining probability is zero + if remaining <= zero(Tv) + break + end + + s += blockDim().x + end + + gap_value = CUDA.reduce_block(+, gap_value, zero(Tv), Val(true)) + + return gap_value +end + + @inline function state_action_sparse_omaximization!( perm::AbstractVector{Int32}, ::Nothing, diff --git a/ext/cuda/probabilities.jl b/ext/cuda/probabilities.jl index bcd2fc84..f9436105 100644 --- a/ext/cuda/probabilities.jl +++ b/ext/cuda/probabilities.jl @@ -11,8 +11,8 @@ function IntervalMDP.compute_gap( return adapt(IntervalMDP.CuModelAdaptor{Tv}, lower), adapt(IntervalMDP.CuModelAdaptor{Tv}, gap) end -IntervalMDP.support(p::IntervalMDP.IntervalAmbiguitySet{R, <:SubArray{R, 1, <:CuSparseDeviceMatrixCSC}}) where {R} = rowvals(p.gap) -IntervalMDP.supportsize(p::IntervalMDP.IntervalAmbiguitySet{R, <:SubArray{R, 1, <:CuSparseDeviceMatrixCSC}}) where {R} = nnz(p.gap) +const CuSparseDeviceColumnView{Tv, Ti} = SubArray{Tv, 1, <:CuSparseDeviceMatrixCSC{Tv, Ti}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} +IntervalMDP.support(p::IntervalMDP.IntervalAmbiguitySet{R, <:CuSparseDeviceColumnView{R}}) where {R} = rowvals(p.gap) +IntervalMDP.supportsize(p::IntervalMDP.IntervalAmbiguitySet{R, <:CuSparseDeviceColumnView{R}}) where {R} = nnz(p.gap) -IntervalMDP.support(p::IntervalMDP.IntervalAmbiguitySet{R, <:SubArray{R, 1, <:CuDeviceMatrix}}) where {R} = eachindex(p.gap) -IntervalMDP.supportsize(p::IntervalMDP.IntervalAmbiguitySet{R, <:SubArray{R, 1, <:CuDeviceMatrix}}) where {R} = length(p.gap) +IntervalMDP.maxsupportsize(p::IntervalMDP.IntervalAmbiguitySets{R, <:CuSparseMatrixCSC{R}}) where {R} = maxdiff(SparseArrays.getcolptr(p.gap)) diff --git a/src/cuda.jl b/src/cuda.jl index 2ad42f4a..781361f2 100644 --- a/src/cuda.jl +++ b/src/cuda.jl @@ -11,19 +11,22 @@ function cu end function cpu end struct OutOfSharedMemory <: Exception - min_shared_memory::Int + required::Int + available::Int end function Base.showerror(io::IO, e::OutOfSharedMemory) println( io, "Out of shared memory: minimum required shared memory for the problem is ", - e.min_shared_memory, - " bytes.", + e.required, + " bytes (", + e.available, + " bytes available on the device).", ) println( io, - "Please try either the CPU implementation, the (dense) decomposed representation (preferred), or use a larger GPU.", + "Please try either the CPU implementation, the decomposed representation, or use a larger GPU.", ) end diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index e1fe526b..662182c6 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -198,7 +198,8 @@ source_shape(p::IntervalAmbiguitySets) = (num_sets(p),) action_shape(::IntervalAmbiguitySets) = (1,) marginals(p::IntervalAmbiguitySets) = (p,) -maxsupportsize(p::IntervalAmbiguitySets{R, MR}) where {R, MR <: AbstractMatrix{R}} = maximum(supportsize, p) +maxsupportsize(p::IntervalAmbiguitySets{R, MR}) where {R, MR <: AbstractMatrix{R}} = size(p.gap, 1) +maxsupportsize(p::IntervalAmbiguitySets{R, MR}) where {R, MR <: SparseArrays.AbstractSparseMatrixCSC{R}} = maxdiff(SparseArrays.getcolptr(p.gap)) function Base.getindex(p::IntervalAmbiguitySets, j::Integer) # Select by columns only! diff --git a/src/utils.jl b/src/utils.jl index baa91405..9c680953 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,3 +1,6 @@ +@inline @inbounds maxdiff(x::V) where {V <: AbstractVector} = + maximum(x[i + 1] - x[i] for i in 1:(length(x) - 1)) + arrayfactory(mp::ProductProcess, T, sizes) = arrayfactory(markov_process(mp), T, sizes) arrayfactory(mp::FactoredRMDP, T, sizes) = diff --git a/test/cuda/sparse/bellman.jl b/test/cuda/sparse/bellman.jl index cc4d0abf..b60db4b9 100644 --- a/test/cuda/sparse/bellman.jl +++ b/test/cuda/sparse/bellman.jl @@ -170,7 +170,45 @@ end n = 100000 m = 10 - nnz_per_column = 4000 # It has to be greater than 3800 to exceed shared memory for ff implementation + nnz_per_column = 4000 # It has to be greater than 3100 to exceed shared memory for ff implementation + prob, V, cuda_prob, cuda_V = + sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) + + ws = IntervalMDP.construct_workspace(prob) + strategy_cache = IntervalMDP.construct_strategy_cache(prob) + V_cpu = zeros(Float64, m) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + V_cpu, + V, + prob; + upper_bound = false, + ) + + ws = IntervalMDP.construct_workspace(cuda_prob) + strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) + V_gpu = CUDA.zeros(Float64, m) + IntervalMDP._bellman_helper!( + ws, + strategy_cache, + V_gpu, + cuda_V, + cuda_prob; + upper_bound = false, + ) + V_gpu = IntervalMDP.cpu(V_gpu) # Convert to CPU for testing + + @test V_cpu ≈ V_gpu + end + + # Even more non-zeros + @testset "even more non-zeros" begin + rng = MersenneTwister(55392) + + n = 100000 + m = 10 + nnz_per_column = 6000 # It has to be greater than 4100 to exceed shared memory for fi implementation prob, V, cuda_prob, cuda_V = sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) @@ -208,7 +246,7 @@ end n = 100000 m = 10 - nnz_per_column = 6000 # It has to be greater than 5800 to exceed shared memory for fi implementation + nnz_per_column = 8000 # It has to be greater than 6144 to exceed shared memory for ii implementation prob, V, cuda_prob, cuda_V = sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) @@ -246,7 +284,7 @@ end n = 100000 m = 10 - nnz_per_column = 16000 # It has to be greater than 15600 to exceed shared memory for i implementation + nnz_per_column = 16000 # It has to be greater than 12300 to exceed shared memory for i implementation prob, V, cuda_prob, cuda_V = sample_sparse_interval_ambiguity_sets(rng, n, m, nnz_per_column) diff --git a/test/cuda/sparse/imdp.jl b/test/cuda/sparse/imdp.jl index c9b6e1e0..8d4f9d4a 100644 --- a/test/cuda/sparse/imdp.jl +++ b/test/cuda/sparse/imdp.jl @@ -1,5 +1,5 @@ using Revise, Test -using IntervalMDP, CUDA +using IntervalMDP, CUDA, SparseArrays @testset for N in [Float32, Float64] prob1 = IntervalAmbiguitySets(; From e04c70ff89f16556a7995c75e0a7269d63a039b0 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Tue, 30 Sep 2025 18:23:16 +0200 Subject: [PATCH 55/71] Make type parameters consistent for specifications --- docs/src/specifications.md | 2 +- src/specification.jl | 40 +++++++++++++++++++------------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/docs/src/specifications.md b/docs/src/specifications.md index d6398cdf..63863d01 100644 --- a/docs/src/specifications.md +++ b/docs/src/specifications.md @@ -235,7 +235,7 @@ prop = InfiniteTimeReward(rewards, discount_factor, convergence_threshold) ### Expected exit time Given a avoid set ``O \subset S``, the expected exit time of the set `S \setminus O` is the following objective ```math -\mathbb{E}^{\pi,\eta}_{\mathrm{exit}}(O) = \mathbb{E}^{\pi,\eta}\left[k : \omega[k] \in O, \, \forall k' \in \{0, \ldots, k\}, \, \omega[k'] \notin O \right]. +\mathbb{E}^{\pi,\eta}_{\mathrm{exit}}(O) = \mathbb{E}^{\pi,\eta}\left[k : \omega[k] \in O, \, \forall k' \in \{0, \ldots, k - 1\}, \, \omega[k'] \notin O \right]. ``` The property is equivalent to the following value function diff --git a/src/specification.jl b/src/specification.jl index a81caf27..6a0c6d04 100644 --- a/src/specification.jl +++ b/src/specification.jl @@ -100,7 +100,7 @@ end postprocess_value_function!(value_function, ::AbstractDFAReachability) = nothing """ - FiniteTimeDFAReachability{VT <: Vector{<:Int32}, T <: Integer} + FiniteTimeDFAReachability{VT <: Vector{<:Integer}, T <: Integer} Finite time reachability specified by a set of target/terminal states and a time horizon. That is, denote a trace by ``z_1 z_2 z_3 \\cdots`` with ``z_k = (s_k, q_k)`` then if ``T`` is the set of target states and ``H`` is the time horizon, @@ -153,13 +153,13 @@ function showproperty(io::IO, first_prefix, prefix, prop::FiniteTimeDFAReachabil end """ - InfiniteTimeDFAReachability{R <: Real, VT <: Vector{<:Int32}} + InfiniteTimeDFAReachability{VT <: Vector{<:Integer}, R <: Real} `InfiniteTimeDFAReachability` is similar to [`FiniteTimeDFAReachability`](@ref) except that the time horizon is infinite, i.e., ``H = \\infty``. In practice it means, performing the value iteration until the value function has converged, defined by some threshold `convergence_eps`. The convergence threshold is that the largest value of the most recent Bellman residual is less than `convergence_eps`. """ -struct InfiniteTimeDFAReachability{R <: Real, VT <: Vector{<:Int32}} <: +struct InfiniteTimeDFAReachability{VT <: Vector{<:Int32}, R <: Real} <: AbstractDFAReachability reach::VT convergence_eps::R @@ -221,7 +221,7 @@ end postprocess_value_function!(value_function, ::AbstractReachability) = nothing """ - FiniteTimeReachability{VT <: Vector{<:CartesianIndex}, T <: Integer} + FiniteTimeReachability{VT <: Vector{Union{<:Integer, <:Tuple, <:CartesianIndex}}, T <: Integer} Finite time reachability specified by a set of target/terminal states and a time horizon. That is, denote a trace by ``\\omega = s_1 s_2 s_3 \\cdots``, then if ``G`` is the set of target states and ``K`` is the time horizon, @@ -273,13 +273,13 @@ function showproperty(io::IO, first_prefix, prefix, prop::FiniteTimeReachability end """ - InfiniteTimeReachability{R <: Real, VT <: Vector{<:CartesianIndex}} - + InfiniteTimeReachability{VT <: Vector{Union{<:Integer, <:Tuple, <:CartesianIndex}}, R <: Real} + `InfiniteTimeReachability` is similar to [`FiniteTimeReachability`](@ref) except that the time horizon is infinite, i.e., ``K = \\infty``. In practice it means, performing the value iteration until the value function has converged, defined by some threshold `convergence_eps`. The convergence threshold is that the largest value of the most recent Bellman residual is less than `convergence_eps`. """ -struct InfiniteTimeReachability{R <: Real, VT <: Vector{<:CartesianIndex}} <: +struct InfiniteTimeReachability{VT <: Vector{<:CartesianIndex}, R <: Real} <: AbstractReachability reach::VT convergence_eps::R @@ -322,7 +322,7 @@ function showproperty(io::IO, first_prefix, prefix, prop::InfiniteTimeReachabili end """ - ExactTimeReachability{VT <: Vector{<:CartesianIndex}, T <: Integer} + ExactTimeReachability{VT <: Vector{Union{<:Integer, <:Tuple, <:CartesianIndex}}, T <: Integer} Exact time reachability specified by a set of target/terminal states and a time horizon. That is, denote a trace by ``\\omega = s_1 s_2 s_3 \\cdots``, then if ``G`` is the set of target states and ``K`` is the time horizon, @@ -424,7 +424,7 @@ function checkdisjoint(reach, avoid) end """ - FiniteTimeReachAvoid{VT <: AbstractVector{<:CartesianIndex}}, T <: Integer} + FiniteTimeReachAvoid{VT <: Vector{Union{<:Integer, <:Tuple, <:CartesianIndex}}}, T <: Integer} Finite time reach-avoid specified by a set of target/terminal states, a set of avoid states, and a time horizon. That is, denote a trace by ``\\omega = s_1 s_2 s_3 \\cdots``, then if ``G`` is the set of target states, ``O`` is the set of states to avoid, @@ -433,7 +433,7 @@ and ``K`` is the time horizon, the property is \\mathbb{P}^{\\pi, \\eta}_{\\mathrm{reach-avoid}}(G, O, K) = \\mathbb{P}^{\\pi, \\eta} \\left[\\omega \\in \\Omega : \\exists k \\in \\{0, \\ldots, K\\}, \\, \\omega[k] \\in G, \\; \\forall k' \\in \\{0, \\ldots, k' \\}, \\, \\omega[k] \\notin O \\right]. ``` """ -struct FiniteTimeReachAvoid{VT <: AbstractVector{<:CartesianIndex}, T <: Integer} <: +struct FiniteTimeReachAvoid{VT <: Vector{<:CartesianIndex}, T <: Integer} <: AbstractReachAvoid reach::VT avoid::VT @@ -492,11 +492,11 @@ function showproperty(io::IO, first_prefix, prefix, prop::FiniteTimeReachAvoid) end """ - InfiniteTimeReachAvoid{R <: Real, VT <: AbstractVector{<:CartesianIndex}} + InfiniteTimeReachAvoid{VT <: Vector{Union{<:Integer, <:Tuple, <:CartesianIndex}}, R <: Real} `InfiniteTimeReachAvoid` is similar to [`FiniteTimeReachAvoid`](@ref) except that the time horizon is infinite, i.e., ``K = \\infty``. """ -struct InfiniteTimeReachAvoid{R <: Real, VT <: AbstractVector{<:CartesianIndex}} <: +struct InfiniteTimeReachAvoid{VT <: Vector{<:CartesianIndex}, R <: Real} <: AbstractReachAvoid reach::VT avoid::VT @@ -555,7 +555,7 @@ function showproperty(io::IO, first_prefix, prefix, prop::InfiniteTimeReachAvoid end """ - ExactTimeReachAvoid{VT <: AbstractVector{<:CartesianIndex}}, T <: Integer} + ExactTimeReachAvoid{VT <: Vector{Union{<:Integer, <:Tuple, <:CartesianIndex}}}, T <: Integer} Exact time reach-avoid specified by a set of target/terminal states, a set of avoid states, and a time horizon. That is, denote a trace by ``\\omega = s_1 s_2 s_3 \\cdots``, then if ``G`` is the set of target states, ``O`` is the set of states to avoid, @@ -564,7 +564,7 @@ and ``K`` is the time horizon, the property is \\mathbb{P}^{\\pi, \\eta}_{\\mathrm{exact-reach-avoid}}(G, O, K) = \\mathbb{P}^{\\pi, \\eta} \\left[\\omega \\in \\Omega : \\omega[K] \\in G, \\; \\forall k \\in \\{0, \\ldots, K\\}, \\, \\omega[k] \\notin O \\right]. ``` """ -struct ExactTimeReachAvoid{VT <: AbstractVector{<:CartesianIndex}, T <: Integer} <: +struct ExactTimeReachAvoid{VT <: Vector{<:CartesianIndex}, T <: Integer} <: AbstractReachAvoid reach::VT avoid::VT @@ -648,7 +648,7 @@ function postprocess_value_function!(value_function, ::AbstractSafety) end """ - FiniteTimeSafety{VT <: Vector{<:CartesianIndex}, T <: Integer} + FiniteTimeSafety{VT <: Vector{Union{<:Integer, <:Tuple, <:CartesianIndex}}, T <: Integer} Finite time safety specified by a set of avoid states and a time horizon. That is, denote a trace by ``\\omega = s_1 s_2 s_3 \\cdots``, then if ``O`` is the set of avoid states and ``K`` is the time horizon, @@ -699,13 +699,13 @@ function showproperty(io::IO, first_prefix, prefix, prop::FiniteTimeSafety) end """ - InfiniteTimeSafety{R <: Real, VT <: Vector{<:CartesianIndex}} + InfiniteTimeSafety{VT <: Vector{Union{<:Integer, <:Tuple, <:CartesianIndex}}, R <: Real} `InfiniteTimeSafety` is similar to [`FiniteTimeSafety`](@ref) except that the time horizon is infinite, i.e., ``K = \\infty``. In practice it means, performing the value iteration until the value function has converged, defined by some threshold `convergence_eps`. The convergence threshold is that the largest value of the most recent Bellman residual is less than `convergence_eps`. """ -struct InfiniteTimeSafety{R <: Real, VT <: Vector{<:CartesianIndex}} <: AbstractSafety +struct InfiniteTimeSafety{VT <: Vector{<:CartesianIndex}, R <: Real} <: AbstractSafety avoid::VT convergence_eps::R end @@ -911,7 +911,7 @@ abstract type AbstractHittingTime <: BasicProperty end postprocess_value_function!(value_function, ::AbstractHittingTime) = value_function """ - ExpectedExitTime{R <: Real, VT <: Vector{<:CartesianIndex}} + ExpectedExitTime{VT <: Vector{Union{<:Integer, <:Tuple, <:CartesianIndex}}, R <: Real} `ExpectedExitTime` is a property of hitting time with respect to an unsafe set. An equivalent characterization is that of the expected number of steps in the safe set until reaching the unsafe set. @@ -919,11 +919,11 @@ The time horizon is infinite, i.e., ``K = \\infty``, thus the package performs v has converged. The convergence threshold is that the largest value of the most recent Bellman residual is less than `convergence_eps`. Given an unsafe set ``O``, the property is defined as ```math - \\mathbb{E}^{\\pi,\\eta}_{\\mathrm{exit}}(O) = \\mathbb{E}^{\\pi,\\eta}\\left[k : \\omega[k] \\in O, \\, \\forall k' \\in \\{0, \\ldots, k\\}, \\, \\omega[k'] \\notin O \\right]. + \\mathbb{E}^{\\pi,\\eta}_{\\mathrm{exit}}(O) = \\mathbb{E}^{\\pi,\\eta}\\left[k : \\omega[k] \\in O, \\, \\forall k' \\in \\{0, \\ldots, k - 1\\}, \\, \\omega[k'] \\notin O \\right]. ``` where ``\\omega = s_0 s_1 \\ldots s_k`` is a trace of the system. """ -struct ExpectedExitTime{R <: Real, VT <: Vector{<:CartesianIndex}} <: AbstractHittingTime +struct ExpectedExitTime{VT <: Vector{<:CartesianIndex}, R <: Real} <: AbstractHittingTime avoid_states::VT convergence_eps::R end From c65a4cf15c260885db10c70075e07a16184b2cf2 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 1 Oct 2025 11:52:48 +0200 Subject: [PATCH 56/71] Update usage.md --- docs/src/models.md | 2 + docs/src/usage.md | 199 +++++++++++++++++++++++---------------------- 2 files changed, 105 insertions(+), 96 deletions(-) diff --git a/docs/src/models.md b/docs/src/models.md index 3ff112db..fdd15a11 100644 --- a/docs/src/models.md +++ b/docs/src/models.md @@ -82,6 +82,8 @@ mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, m !!! warn Notice that source-action pairs are on the columns of the matrices to defined the interval bounds. This is counter to most literature on transition matrices where transitions are from row to column. The choice of layout is to ensure that the memory access pattern is cache-friendly, as each column is stored contiguously in memory (column-major) and the Bellman updates iterate outer-most over source-action pairs. However, it also has a fundamental mathematical justification: the transition matrix can be viewed as a linear operator and the matrix form of a linear operator is defined such that the columns correspond to the input dimensions, i.e. from column to row. Furthermore, actions for the same source state are stored contiguously, which is also important for cache efficiency. +A general and useful subclass of fRMDPs is when each marginal ambiguity set is an interval ambiguity set. This subclass is called factored IMDPs (fIMDPs) and is described in more detail [below](@ref "fIMDPs"). + ## IMCs Interval Markov Chains (IMCs) [delahaye2011decision](@cite) are a subclass of fRMDPs and a generalization of Markov Chains (MCs), where the transition probabilities are not known exactly, but they are constrained to be in some probability interval. Formally, an IMC ``M`` is a tuple ``M = (S, S_0, \Gamma)``, where diff --git a/docs/src/usage.md b/docs/src/usage.md index 37255399..fbf39888 100644 --- a/docs/src/usage.md +++ b/docs/src/usage.md @@ -1,34 +1,19 @@ # Usage -The general procedure for using this package can be described in 3 steps -1. Construct interval Markov process (IMC or IMDP) -2. Choose property (reachability, reach-avoid, safety, or reward + finite/infinite horizon) -3. Choose specification (optimistic/pessimistic, maximize/minimize + property) -3. Call `value_iteration` or `control_synthesis`. +The general procedure for using this package can be described in 5 steps +1. Construct a model, e.g. an Interval Markov Decision Process (IMDP) or some other subclass of [`FactoredRobustMarkovDecisionProcess`](@ref). +2. Choose property (reachability/reach-avoid/safety/reward + finite/infinite horizon). +3. Choose specification (optimistic/pessimistic + maximize/minimize + property). +4. Combine system and specification in a `VerificationProblem` or `ControlSynthesisProblem`, depending on whether you want to verify or synthesize a controller or not. +5. Call [`solve`](@ref) with the constructed problem and optionally a chosen algorithm. If no algorithm is given, a default algorithm will be chosen. -First, we construct a system. We can either construct an interval Markov chain (IMC) or an interval Markov decision process. (IMDP) -Both systems consist of states, a designated initial state, and a transition matrix. In addition, an IMDP has actions. -An example of how to construct either is the following: +First, we construct a system; for the purpose of this example, we will construct either an IMDP. For more information about the different models, see [Models](@ref). Note that all subclasses of [`FactoredRobustMarkovDecisionProcess`](@ref) are converted to an fRMDP internally for verification and control synthesis, and the default algorithm is inferred based on the structure of the fRMDP. +An fRMDP consist of state variables (each can take on a finite number of values), action variables (similar to state variables), designated initial states, and a transition model; more specifically, the product of ambiguity sets for each marginal. See [Factored RMDPs](@ref) for more information about transition model. -```julia -using IntervalMDP +An example of how to construct IMDP is the following: -# IMC -prob = IntervalAmbiguitySets(; - lower = [ - 0.0 0.5 0.0 - 0.1 0.3 0.0 - 0.2 0.1 1.0 - ], - upper = [ - 0.5 0.7 0.0 - 0.6 0.5 0.0 - 0.7 0.3 1.0 - ], -) - -initial_states = [1] # Initial states are optional -mc = IntervalMarkovChain(prob, initial_states) +```jldoctest usage +using IntervalMDP # IMDP prob1 = IntervalAmbiguitySets(; @@ -57,64 +42,59 @@ prob2 = IntervalAmbiguitySets(; ], ) -prob3 = IntervalProbabilities(; +prob3 = IntervalAmbiguitySets(; lower = [ 0.0 0.0 0.0 0.0 - 0.1 0.1 + 1.0 1.0 ], upper = [ 0.0 0.0 0.0 0.0 - 0.1 0.1 + 1.0 1.0 ], ) transition_probs = [prob1, prob2, prob3] initial_states = [1] # Initial states are optional -imdp = IntervalMarkovDecisionProcess(transition_probs, initial_states) +mdp = IntervalMarkovDecisionProcess(transition_probs, initial_states) + +# output + +FactoredRobustMarkovDecisionProcess +├─ 1 state variables with cardinality: (3,) +├─ 1 action variables with cardinality: (2,) +├─ Initial states: [1] +├─ Transition marginals: +│ └─ Marginal 1: +│ ├─ Conditional variables: states = (1,), actions = (1,) +│ └─ Ambiguity set type: Interval (dense, Matrix{Float64}) +└─Inferred properties + ├─Model type: Interval MDP + ├─Number of states: 3 + ├─Number of actions: 2 + ├─Default model checking algorithm: Robust Value Iteration + └─Default Bellman operator algorithm: O-Maximization ``` Note that for an IMDP, the transition probabilities are specified as a list of transition probabilities (with each column representing an action) for each state. The constructor will concatenate the transition probabilities into a single matrix, such that the columns represent source/action pairs and the rows represent target states. -It will in addition construct a state pointer `stateptr` pointing to the first column of each state. -See [`IntervalMarkovDecisionProcess`](@ref) for more details on how to construct an IMDP. -For IMC, the transition probability structure is significantly simpler with source states on the columns and target states on the rows of the transition matrices. Internally, they are both represented by an `IntervalMarkovDecisionProcess`. +!!! tip + IMDPs can be very memory intensive if the ambiguity sets are stored as dense matrices. To reduce memory usage, consider using [Sparse matrices](@ref) and/or [Factored RMDPs](@ref) (recommended). -Next, we choose a property. Currently supported are reachability, reach-avoid, safety, and reward properties. -For reachability, we specify a target set of states and for reach-avoid we specify a target set of states and an avoid set of states. -For a safety property, we specify a set of states that must be avoided, and for a reward property, we specify a reward matrix and a discount factor. -Furthermore, this package distinguishes distinguish between finite and infinite horizon properties - for finite horizon, a time horizon must be given while for infinite horizon, a convergence threshold must be given. In addition to the property, we need to specify whether we want to maximize or minimize the optimistic or pessimistic satisfaction probability or discounted reward. +Next, we choose a property. Currently supported are reachability, reach-avoid, safety, reward, expected exit time and DFA-based properties. +For this example, we will use a reachability property, which requires specifying a set of target states `target_set`. +Furthermore, this package distinguishes distinguish between finite and infinite horizon properties - for finite horizon, a time horizon must be given, while for infinite horizon, a convergence threshold must be provided. -```julia -## Properties +In addition to the property, we need to specify whether we want to maximize or minimize the optimistic or pessimistic value (the value being satisfaction probability, discounted reward, etc.). We call this a specification. + +```jldoctest usage # Reachability target_set = [3] - prop = FiniteTimeReachability(target_set, 10) # Time steps prop = InfiniteTimeReachability(target_set, 1e-6) # Residual tolerance -# Reach-avoid -target_set = [3] -avoid_set = [2] - -prop = FiniteTimeReachAvoid(target_set, avoid_set, 10) # Time steps -prop = InfiniteTimeReachAvoid(target_set, avoid_set, 1e-6) # Residual tolerance - -# Safety -avoid_set = [2] - -prop = FiniteTimeSafety(avoid_set, 10) # Time steps -prop = InfiniteTimeSafety(avoid_set, 1e-6) # Residual tolerance - -# Reward -reward = [1.0, 2.0, 3.0] -discount = 0.9 # Has to be between 0 and 1 - -prop = FiniteTimeReward(reward, discount, 10) # Time steps -prop = InfiniteTimeReward(reward, discount, 1e-6) # Residual tolerance - ## Specification spec = Specification(prop, Pessimistic, Maximize) spec = Specification(prop, Pessimistic, Minimize) @@ -122,35 +102,61 @@ spec = Specification(prop, Optimistic, Maximize) spec = Specification(prop, Optimistic, Minimize) ## Combine system and specification in a Problem -problem = VerificationProblem(imdp_or_imc, spec) +verification_problem = VerificationProblem(mdp, spec) # use `VerificationProblem(mdp, spec, strategy)` to verify under a given strategy +control_problem = ControlSynthesisProblem(mdp, spec) + +# output + +ControlSynthesisProblem +├─ FactoredRobustMarkovDecisionProcess +│ ├─ 1 state variables with cardinality: (3,) +│ ├─ 1 action variables with cardinality: (2,) +│ ├─ Initial states: [1] +│ ├─ Transition marginals: +│ │ └─ Marginal 1: +│ │ ├─ Conditional variables: states = (1,), actions = (1,) +│ │ └─ Ambiguity set type: Interval (dense, Matrix{Float64}) +│ └─Inferred properties +│ ├─Model type: Interval MDP +│ ├─Number of states: 3 +│ ├─Number of actions: 2 +│ ├─Default model checking algorithm: Robust Value Iteration +│ └─Default Bellman operator algorithm: O-Maximization +└─ Specification + ├─ Satisfaction mode: Optimistic + ├─ Strategy mode: Minimize + └─ Property: InfiniteTimeReachability + ├─ Convergence threshold: 1.0e-6 + └─ Reach states: CartesianIndex{1}[CartesianIndex(3,)] ``` +!!! tip + For complex properties, e.g. LTLf, it is necessary to construct a Definite Finite Automaton (DFA) and (lazily) build the product with the fRMDP. See [Complex properties](@ref) for more details on the product construction and DFA properties. Note that constructing the DFA from an LTLf formula is currently not supported by this package. + Finally, we call [`solve`](@ref) to solve the specification. `solve` returns the value function for all states in addition to the number of iterations performed and the last Bellman residual, wrapped in a solution object. -```julia -sol = solve(problem) # or solve(problem, RobustValueIteration()) +```jldoctest usage; output = false +sol = solve(verification_problem) # or solve(problem, alg) where e.g. alg = RobustValueIteration(LPMcCormickRelaxation()) to specify the algorithm V, k, res = sol # or alternatively V, k, res = value_function(sol), num_iterations(sol), residual(sol) + +# For control synthesis, we also get a strategy +sol = solve(control_problem) +V, k, res, strategy = sol + +# output + +IntervalMDP.ControlSynthesisSolution{StationaryStrategy{1, Vector{Tuple{Int32}}}, Float64, Vector{Float64}, Nothing}(StationaryStrategy{1, Vector{Tuple{Int32}}}(Tuple{Int32}[(2,), (1,), (1,)]), [0.19999999999999998, 0.4, 1.0], [-0.0, -0.0, -0.0], 2, nothing) ``` For now, only [`RobustValueIteration`](@ref) is supported, but more algorithms are planned. !!! note To use multi-threading for parallelization, you need to either start julia with `julia --threads ` where `n` is a positive integer or to set the environment variable `JULIA_NUM_THREADS` to the number of threads you want to use. For more information, see [Multi-threading](https://docs.julialang.org/en/v1/manual/multi-threading/). -!!! tip - For less memory usage, it is recommended to use [Sparse matrices](@ref) and/or [Orthogonal models](@ref). - ## Sparse matrices -A disadvantage of IMDPs is that the size of the transition matrices grows ``O(n^2 m)`` where ``n`` is the number of states and ``m`` is the number of actions. -Quickly, this becomes infeasible to store in memory. However, IMDPs frequently have lots of sparsity we may exploit. We choose in particular to -store the transition matrices in the [compressed sparse column (CSC)](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_(CSC_or_CCS)) format. -This is a format that is widely used in Julia and other languages, and is supported by many linear algebra operations. -It consists of three arrays: `colptr`, `rowval` and `nzval`. The `colptr` array stores the indices of the first non-zero value in each column. -The `rowval` array stores the row indices of the non-zero values, and the `nzval` array stores the non-zero values. -We choose this format, since source states are on the columns (see [`IntervalAmbiguitySets`](@ref) for more information about the structure of the transition probability matrices). -Thus the non-zero values for each source state is stored in sequentially in memory, enabling efficient memory access. +A disadvantage of IMDPs is that the size of the transition matrices grows ``O(n^2 m)`` where ``n`` is the number of states and ``m`` is the number of actions. Quickly, this becomes infeasible to store in memory. However, IMDPs frequently have lots of sparsity we may exploit. We choose in particular to store the transition matrices in the [Compressed Sparse Column (CSC)](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_(CSC_or_CCS)) format. This is a format that is widely used in Julia and other languages, and is supported by many linear algebra operations. The format consists of three arrays: `colptr`, `rowval` and `nzval`. The `colptr` array stores the indices of the first non-zero value in each column. The `rowval` array stores the row indices of the non-zero values, and the `nzval` array stores the non-zero values. We choose this format, since source states are stored as columns (see [`IntervalAmbiguitySets`](@ref) and [`Marginal`](@ref) for more information about the structure of the transition probability matrices). Thus the non-zero values for each source state is stored in sequentially in memory, enabling efficient memory access. To use `SparseMatrixCSC`, we need to load `SparseArrays`. Below is an example of how to construct an `IntervalMarkovChain` with sparse transition matrices. ```@example @@ -190,20 +196,10 @@ initial_state = 1 imc = IntervalMarkovChain(prob, initial_state) ``` -If you know that the matrix can be built sequentially, you can use the `SparseMatrixCSC` constructor directly with `colptr`, `rowval` and `nzval`. -This is more efficient, since `setindex!` of `SparseMatrixCSC` needs to perform a binary search to find the correct index to insert the value, -and possibly expand the size of the array. - -## Orthogonal models -TODO - -## Control synthesis -TODO +If you know that the matrix can be built sequentially, you can use the `SparseMatrixCSC` constructor directly with `colptr`, `rowval` and `nzval`. This is more efficient, since `setindex!` of `SparseMatrixCSC` needs to perform a binary search to find the correct index to insert the value, and possibly expand the size of the array. ## CUDA -Part of the innovation of this package is GPU-accelerated value iteration via CUDA. This includes not only -trivial parallelization across states but also parallel algorithms for O-maximization within each state -for better computational efficiency and coalesced memory access for more speed. +This package is supports GPU-accelerated value iteration via CUDA (only for [`IMDPs`](@ref) and [`IMCs`](@ref) at the moment). This includes not only trivial parallelization across states but also parallel algorithms for O-maximization within each state for better computational efficiency and coalesced memory access for more speed. To use CUDA, you need to first install `CUDA.jl`. For more information about this, see [Installation](@ref). Next, you need to load the package with the following command: @@ -211,16 +207,11 @@ Next, you need to load the package with the following command: using CUDA ``` -Loading CUDA will automatically load an extension that defines value iteration with CUDA arrays. -It has been separated out into an extension to reduce precompilation time for users that do not need CUDA. -Note that loading CUDA on a system without a CUDA-capable GPU, will not cause any errors, although a warning, upon loading, but only when running. -You can check if CUDA is correctly loaded using `CUDA.functional()`. +Loading CUDA will automatically load an extension that defines Bellman operators when the ambiguity sets are specified as CUDA arrays. It has been separated out into an extension to reduce precompilation time for users that do not need CUDA. Note that loading CUDA on a system without a CUDA-capable GPU, will not cause any errors, but only when running. You can check if CUDA is available using `CUDA.functional()`. -To use CUDA, you need to transfer the model to the GPU. Once on the GPU, you can use the same functions as the CPU implementation. -Using Julia's multiple dispatch, the package will automatically dispatch to the appropriate implementation of `bellman!`. +To use CUDA, you need to transfer the model to the GPU. Once on the GPU, you can use the same functions as the CPU implementation. Using Julia's multiple dispatch, the package will automatically dispatch to the appropriate implementation of the Bellman operators. -Similar to `CUDA.jl`, we provide a `cu` function that transfers the model to the GPU[^1]. You can either transfer the entire model -or transfer the transition matrices separately. +Similar to `CUDA.jl`, we provide a `cu` function that transfers the model to the GPU[^1]. You can either transfer the entire model or transfer the transition matrices separately. ```julia # Transfer entire model to GPU prob = IntervalAmbiguitySets(; @@ -238,6 +229,22 @@ prob = IntervalAmbiguitySets(; mc = IntervalMDP.cu(IntervalMarkovChain(prob, 1)) +# Transfer ambiguity sets to GPU +prob = IntervalMDP.cu(IntervalAmbiguitySets(; + lower = sparse_hcat( + SparseVector(3, [2, 3], [0.1, 0.2]), + SparseVector(3, [1, 2, 3], [0.5, 0.3, 0.1]), + SparseVector(3, [3], [1.0]), + ), + upper = sparse_hcat( + SparseVector(3, [1, 2, 3], [0.5, 0.6, 0.7]), + SparseVector(3, [1, 2, 3], [0.7, 0.5, 0.3]), + SparseVector(3, [3], [1.0]), + ), +)) + +mc = IntervalMarkovChain(prob, [1]) + # Transfer transition matrices separately prob = IntervalAmbiguitySets(; lower = IntervalMDP.cu(sparse_hcat( @@ -252,7 +259,7 @@ prob = IntervalAmbiguitySets(; )), ) -mc = IntervalMarkovChain(prob,[1]) +mc = IntervalMarkovChain(prob, [1]) ``` -[^1]: The difference to `CUDA.jl`'s `cu` function is that `IntervalMDPs.jl`'s `cu` is opinoinated to `Float64` values and `Int32` indices, to reduce register pressure but maintain accuracy \ No newline at end of file +[^1]: The difference to `CUDA.jl`'s `cu` function is that `IntervalMDPs.jl`'s `cu` is opinionated to preserve value types and use `Int32` indices, to reduce register pressure but maintain accuracy \ No newline at end of file From 39cb48fdc52672193f0877e711acd9533742a0cc Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 1 Oct 2025 16:37:15 +0200 Subject: [PATCH 57/71] Format --- docs/make.jl | 5 +- ext/IntervalMDPCudaExt.jl | 9 +- ext/cuda/array.jl | 30 +- ext/cuda/bellman/dense.jl | 49 +- ext/cuda/bellman/sparse.jl | 172 +- ext/cuda/probabilities.jl | 27 +- ext/cuda/strategy.jl | 9 +- ext/cuda/workspace.jl | 4 +- src/Data/bmdp-tool.jl | 12 +- src/Data/intervalmdp.jl | 58 +- src/Data/prism.jl | 72 +- src/algorithms.jl | 53 +- src/bellman.jl | 158 +- src/models/DFA.jl | 4 +- .../FactoredRobustMarkovDecisionProcess.jl | 106 +- src/models/IntervalMarkovChain.jl | 18 +- src/models/IntervalMarkovDecisionProcess.jl | 33 +- src/models/ProductProcess.jl | 15 +- src/probabilities/IntervalAmbiguitySets.jl | 161 +- src/probabilities/Marginal.jl | 68 +- src/probabilities/probabilities.jl | 8 +- src/problem.jl | 8 +- src/specification.jl | 51 +- src/strategy.jl | 2 +- src/strategy_cache.jl | 26 +- src/utils.jl | 11 +- src/workspace.jl | 90 +- test/base/bellman.jl | 54 +- test/base/factored.jl | 1259 +++++++------- test/base/imdp.jl | 57 +- test/base/synthesis.jl | 2 +- test/base/vi.jl | 2 +- test/cuda/dense/bellman.jl | 20 +- test/cuda/dense/imdp.jl | 45 +- test/cuda/dense/synthesis.jl | 3 +- test/cuda/dense/vi.jl | 2 +- test/cuda/sparse/imdp.jl | 26 +- test/cuda/sparse/synthesis.jl | 3 +- test/cuda/sparse/vi.jl | 2 +- test/data/bmdp_tool.jl | 2 +- test/sparse/bellman.jl | 55 +- test/sparse/factored.jl | 1541 +++++++++-------- test/sparse/imdp.jl | 27 +- test/sparse/sparse.jl | 8 +- test/sparse/synthesis.jl | 2 +- test/sparse/vi.jl | 14 +- 46 files changed, 2443 insertions(+), 1940 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index 7efa12ee..cb75678a 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -4,10 +4,7 @@ using Documenter, DocumenterCitations push!(LOAD_PATH, "../src/") DocMeta.setdocmeta!(IntervalMDP, :DocTestSetup, :(using IntervalMDP); recursive = true) -bib = CitationBibliography( - joinpath(@__DIR__, "src", "refs.bib"); - style=:numeric -) +bib = CitationBibliography(joinpath(@__DIR__, "src", "refs.bib"); style = :numeric) makedocs(; modules = [IntervalMDP, IntervalMDP.Data], diff --git a/ext/IntervalMDPCudaExt.jl b/ext/IntervalMDPCudaExt.jl index 0541b3cb..8ffc254e 100644 --- a/ext/IntervalMDPCudaExt.jl +++ b/ext/IntervalMDPCudaExt.jl @@ -14,7 +14,8 @@ IntervalMDP.cpu(obj) = adapt(IntervalMDP.CpuModelAdaptor{IntervalMDP.valuetype(o Adapt.@adapt_structure Marginal Adapt.@adapt_structure StationaryStrategy -Adapt.adapt_structure(to, strategy::TimeVaryingStrategy) = TimeVaryingStrategy([adapt(to, s) for s in strategy.strategy]) +Adapt.adapt_structure(to, strategy::TimeVaryingStrategy) = + TimeVaryingStrategy([adapt(to, s) for s in strategy.strategy]) function Adapt.adapt_structure( T::Type{<:IntervalMDP.CuModelAdaptor}, @@ -26,7 +27,7 @@ function Adapt.adapt_structure( IntervalMDP.source_shape(mdp), adapt(T, marginals(mdp)), adapt(CuArray{Int32}, initial_states(mdp)), - Val(false) # check = false + Val(false), # check = false ) end @@ -40,7 +41,7 @@ function Adapt.adapt_structure( IntervalMDP.source_shape(mdp), adapt(T, marginals(mdp)), adapt(Array{Int32}, initial_states(mdp)), - Val(false) # check = false + Val(false), # check = false ) end @@ -48,7 +49,7 @@ function Adapt.adapt_structure(to, as::IntervalAmbiguitySets) return IntervalAmbiguitySets( adapt(to, as.lower), adapt(to, as.gap), - Val(false) # check = false + Val(false), # check = false ) end diff --git a/ext/cuda/array.jl b/ext/cuda/array.jl index d2357f52..019b7763 100644 --- a/ext/cuda/array.jl +++ b/ext/cuda/array.jl @@ -62,22 +62,32 @@ Adapt.adapt_storage( M::SparseArrays.FixedSparseCSC, ) where {Tv} = CuSparseMatrixCSC{Tv, Int32}(M) -Adapt.adapt_storage(::Type{<:IntervalMDP.CuModelAdaptor{Tv1}}, M::SparseMatrixCSC{Tv2}) where {Tv1, Tv2} = - CuSparseMatrixCSC{Tv1, Int32}(M) +Adapt.adapt_storage( + ::Type{<:IntervalMDP.CuModelAdaptor{Tv1}}, + M::SparseMatrixCSC{Tv2}, +) where {Tv1, Tv2} = CuSparseMatrixCSC{Tv1, Int32}(M) -Adapt.adapt_storage(::Type{<:IntervalMDP.CuModelAdaptor{Tv1}}, x::AbstractArray{Tv2}) where {Tv1, Tv2} = - adapt(CuArray{Tv1}, x) +Adapt.adapt_storage( + ::Type{<:IntervalMDP.CuModelAdaptor{Tv1}}, + x::AbstractArray{Tv2}, +) where {Tv1, Tv2} = adapt(CuArray{Tv1}, x) -Adapt.adapt_storage(::Type{<:IntervalMDP.CuModelAdaptor{Tv1}}, x::AbstractArray{NTuple{N, T}}) where {Tv1, N, T <: Integer} = - adapt(CuArray{NTuple{N, T}}, x) +Adapt.adapt_storage( + ::Type{<:IntervalMDP.CuModelAdaptor{Tv1}}, + x::AbstractArray{NTuple{N, T}}, +) where {Tv1, N, T <: Integer} = adapt(CuArray{NTuple{N, T}}, x) Adapt.adapt_storage( ::Type{IntervalMDP.CpuModelAdaptor{Tv}}, M::CuSparseMatrixCSC, ) where {Tv} = SparseMatrixCSC{Tv, Int32}(M) -Adapt.adapt_storage(::Type{<:IntervalMDP.CpuModelAdaptor{Tv1}}, x::CuArray{Tv2}) where {Tv1, Tv2} = - adapt(Array{Tv1}, x) +Adapt.adapt_storage( + ::Type{<:IntervalMDP.CpuModelAdaptor{Tv1}}, + x::CuArray{Tv2}, +) where {Tv1, Tv2} = adapt(Array{Tv1}, x) -Adapt.adapt_storage(::Type{<:IntervalMDP.CpuModelAdaptor{Tv1}}, x::CuArray{NTuple{N, T}}) where {Tv1, N, T <: Integer} = - adapt(Array{NTuple{N, T}}, x) +Adapt.adapt_storage( + ::Type{<:IntervalMDP.CpuModelAdaptor{Tv1}}, + x::CuArray{NTuple{N, T}}, +) where {Tv1, N, T <: Integer} = adapt(Array{NTuple{N, T}}, x) diff --git a/ext/cuda/bellman/dense.jl b/ext/cuda/bellman/dense.jl index bad7540e..0a250699 100644 --- a/ext/cuda/bellman/dense.jl +++ b/ext/cuda/bellman/dense.jl @@ -7,16 +7,23 @@ function IntervalMDP._bellman_helper!( upper_bound = false, maximize = true, ) where {Tv} - n_actions = isa(strategy_cache, IntervalMDP.OptimizingStrategyCache) ? workspace.num_actions : 1 + n_actions = + isa(strategy_cache, IntervalMDP.OptimizingStrategyCache) ? workspace.num_actions : 1 marginal = marginals(model)[1] n_states = source_shape(marginal)[1] if IntervalMDP.valuetype(marginal) != Tv - throw(ArgumentError("Value type of the model ($(IntervalMDP.valuetype(marginal))) does not match the value type of the input vector ($Tv).")) + throw( + ArgumentError( + "Value type of the model ($(IntervalMDP.valuetype(marginal))) does not match the value type of the input vector ($Tv).", + ), + ) end max_states_per_block = 32 # == num_warps - shmem = length(V) * (sizeof(Int32) + sizeof(Tv)) + max_states_per_block * n_actions * sizeof(Tv) + shmem = + length(V) * (sizeof(Int32) + sizeof(Tv)) + + max_states_per_block * n_actions * sizeof(Tv) kernel = @cuda launch = false dense_bellman_kernel!( workspace, @@ -41,7 +48,8 @@ function IntervalMDP._bellman_helper!( states_per_block = min(n_states, div(max_threads, threads_per_state)) threads = threads_per_state * states_per_block blocks = min(2^16 - 1, cld(n_states, states_per_block)) - shmem = length(V) * (sizeof(Int32) + sizeof(Tv)) + states_per_block * n_actions * sizeof(Tv) + shmem = + length(V) * (sizeof(Int32) + sizeof(Tv)) + states_per_block * n_actions * sizeof(Tv) kernel( workspace, @@ -97,12 +105,15 @@ end @inline function initialize_dense_action_workspace( workspace, ::OptimizingActiveCache, - marginal + marginal, ) assume(warpsize() == 32) nwarps = div(blockDim().x, warpsize()) wid = fld1(threadIdx().x, warpsize()) - action_workspace = CuDynamicSharedArray(IntervalMDP.valuetype(marginal), (workspace.num_actions, nwarps)) + action_workspace = CuDynamicSharedArray( + IntervalMDP.valuetype(marginal), + (workspace.num_actions, nwarps), + ) @inbounds action_workspace = @view action_workspace[:, wid] return action_workspace @@ -111,7 +122,7 @@ end @inline function initialize_dense_action_workspace( workspace, ::NonOptimizingActiveCache, - marginal + marginal, ) return nothing end @@ -120,13 +131,18 @@ end workspace, ::OptimizingActiveCache, V::AbstractVector{Tv}, - marginal + marginal, ) where {Tv} assume(warpsize() == 32) nwarps = div(blockDim().x, warpsize()) Tv2 = IntervalMDP.valuetype(marginal) - value = CuDynamicSharedArray(Tv, length(V), workspace.num_actions * nwarps * sizeof(Tv2)) - perm = CuDynamicSharedArray(Int32, length(V), workspace.num_actions * nwarps * sizeof(Tv2) + length(V) * sizeof(Tv)) + value = + CuDynamicSharedArray(Tv, length(V), workspace.num_actions * nwarps * sizeof(Tv2)) + perm = CuDynamicSharedArray( + Int32, + length(V), + workspace.num_actions * nwarps * sizeof(Tv2) + length(V) * sizeof(Tv), + ) return value, perm end @@ -134,7 +150,7 @@ end workspace, ::NonOptimizingActiveCache, V::AbstractVector{Tv}, - marginal + marginal, ) where {Tv} value = CuDynamicSharedArray(Tv, length(V)) perm = CuDynamicSharedArray(Int32, length(V), length(V) * sizeof(Tv)) @@ -219,7 +235,14 @@ end end # Find the best action - v = extract_strategy_warp!(strategy_cache, action_workspace, Vres, jₛ, action_reduce, lane) + v = extract_strategy_warp!( + strategy_cache, + action_workspace, + Vres, + jₛ, + action_reduce, + lane, + ) if lane == one(Int32) Vres[jₛ] = v @@ -322,4 +345,4 @@ end res_value = CUDA.reduce_warp(+, res_value) return res_value -end \ No newline at end of file +end diff --git a/ext/cuda/bellman/sparse.jl b/ext/cuda/bellman/sparse.jl index 4aa45629..1dbb5516 100644 --- a/ext/cuda/bellman/sparse.jl +++ b/ext/cuda/bellman/sparse.jl @@ -83,7 +83,12 @@ function IntervalMDP._bellman_helper!( return Vres end - throw(IntervalMDP.OutOfSharedMemory(workspace.max_support * sizeof(Int32), CUDA.limit(CUDA.LIMIT_SHMEM_SIZE))) + throw( + IntervalMDP.OutOfSharedMemory( + workspace.max_support * sizeof(Int32), + CUDA.limit(CUDA.LIMIT_SHMEM_SIZE), + ), + ) end function try_small_sparse_bellman!( @@ -101,12 +106,17 @@ function try_small_sparse_bellman!( # - use shared memory to store the values and gap probability # - use bitonic sort in a warp to sort values_gaps - n_actions = isa(strategy_cache, IntervalMDP.OptimizingStrategyCache) ? workspace.num_actions : 1 + n_actions = + isa(strategy_cache, IntervalMDP.OptimizingStrategyCache) ? workspace.num_actions : 1 marginal = marginals(model)[1] n_states = source_shape(marginal)[1] if IntervalMDP.valuetype(marginal) != Tv - throw(ArgumentError("Value type of the model ($(IntervalMDP.valuetype(marginal))) does not match the value type of the input vector ($Tv).")) + throw( + ArgumentError( + "Value type of the model ($(IntervalMDP.valuetype(marginal))) does not match the value type of the input vector ($Tv).", + ), + ) end kernel = @cuda launch = false small_sparse_bellman_kernel!( @@ -163,8 +173,10 @@ function small_sparse_bellman_kernel!( ) where {Tv} assume(warpsize() == 32) - action_workspace = initialize_small_sparse_action_workspace(workspace, strategy_cache, marginal) - value_ws, gap_ws = initialize_small_sparse_value_and_gap(workspace, strategy_cache, V, marginal) + action_workspace = + initialize_small_sparse_action_workspace(workspace, strategy_cache, marginal) + value_ws, gap_ws = + initialize_small_sparse_value_and_gap(workspace, strategy_cache, V, marginal) nwarps = div(blockDim().x, warpsize()) wid = fld1(threadIdx().x, warpsize()) @@ -191,13 +203,16 @@ end @inline function initialize_small_sparse_action_workspace( workspace, ::OptimizingActiveCache, - marginal + marginal, ) assume(warpsize() == 32) nwarps = div(blockDim().x, warpsize()) wid = fld1(threadIdx().x, warpsize()) - action_workspace = CuDynamicSharedArray(IntervalMDP.valuetype(marginal), (workspace.num_actions, nwarps)) + action_workspace = CuDynamicSharedArray( + IntervalMDP.valuetype(marginal), + (workspace.num_actions, nwarps), + ) @inbounds action_workspace = @view action_workspace[:, wid] return action_workspace @@ -206,7 +221,7 @@ end @inline function initialize_small_sparse_action_workspace( workspace, ::NonOptimizingActiveCache, - marginal + marginal, ) return nothing end @@ -215,17 +230,26 @@ end workspace, ::OptimizingActiveCache, V::AbstractVector{Tv}, - marginal + marginal, ) where {Tv} assume(warpsize() == 32) nwarps = div(blockDim().x, warpsize()) wid = fld1(threadIdx().x, warpsize()) Tv2 = IntervalMDP.valuetype(marginal) - value_ws = CuDynamicSharedArray(Tv, (workspace.max_support, nwarps), workspace.num_actions * nwarps * sizeof(Tv2)) + value_ws = CuDynamicSharedArray( + Tv, + (workspace.max_support, nwarps), + workspace.num_actions * nwarps * sizeof(Tv2), + ) @inbounds value_ws = @view value_ws[:, wid] - gap_ws = CuDynamicSharedArray(Tv, (workspace.max_support, nwarps), workspace.num_actions * nwarps * sizeof(Tv2) + workspace.max_support * nwarps * sizeof(Tv)) + gap_ws = CuDynamicSharedArray( + Tv, + (workspace.max_support, nwarps), + workspace.num_actions * nwarps * sizeof(Tv2) + + workspace.max_support * nwarps * sizeof(Tv), + ) @inbounds gap_ws = @view gap_ws[:, wid] return value_ws, gap_ws @@ -235,7 +259,7 @@ end workspace, ::NonOptimizingActiveCache, V::AbstractVector{Tv}, - marginal + marginal, ) where {Tv} assume(warpsize() == 32) nwarps = div(blockDim().x, warpsize()) @@ -244,7 +268,11 @@ end value_ws = CuDynamicSharedArray(Tv, (workspace.max_support, nwarps)) @inbounds value_ws = @view value_ws[:, wid] - gap_ws = CuDynamicSharedArray(Tv, (workspace.max_support, nwarps), workspace.max_support * nwarps * sizeof(Tv)) + gap_ws = CuDynamicSharedArray( + Tv, + (workspace.max_support, nwarps), + workspace.max_support * nwarps * sizeof(Tv), + ) @inbounds gap_ws = @view gap_ws[:, wid] return value_ws, gap_ws end @@ -275,7 +303,7 @@ end V, ambiguity_set, value_lt, - lane + lane, ) if lane == one(Int32) @@ -287,7 +315,14 @@ end end # Find the best action - v = extract_strategy_warp!(strategy_cache, action_workspace, Vres, jₛ, action_reduce, lane) + v = extract_strategy_warp!( + strategy_cache, + action_workspace, + Vres, + jₛ, + action_reduce, + lane, + ) if lane == one(Int32) Vres[jₛ] = v @@ -314,7 +349,14 @@ end ambiguity_set = marginal[jₐ, (jₛ,)] # Use O-maxmization to find the value for the action - v = state_action_small_sparse_omaximization!(value_ws, gap_ws, V, ambiguity_set, value_lt, lane) + v = state_action_small_sparse_omaximization!( + value_ws, + gap_ws, + V, + ambiguity_set, + value_lt, + lane, + ) if lane == one(Int32) Vres[jₛ] = v @@ -329,7 +371,7 @@ end V, ambiguity_set, value_lt, - lane + lane, ) small_sparse_initialize_sorting_shared_memory!(V, ambiguity_set, value_ws, gap_ws, lane) @@ -348,7 +390,7 @@ end ambiguity_set, value, prob, - lane + lane, ) assume(warpsize() == 32) support = IntervalMDP.support(ambiguity_set) @@ -366,11 +408,7 @@ end sync_warp() end -@inline function add_lower_mul_V_warp( - V::AbstractVector{R}, - ambiguity_set, - lane, -) where {R} +@inline function add_lower_mul_V_warp(V::AbstractVector{R}, ambiguity_set, lane) where {R} assume(warpsize() == 32) warp_aligned_length = kernel_nextwarp(IntervalMDP.supportsize(ambiguity_set)) @@ -460,7 +498,8 @@ function try_large_sparse_bellman!( # - use shared memory to store the values/value_perm and gap probability/gap_perm # - use bitonic sort in a block to sort the values - n_actions = isa(strategy_cache, IntervalMDP.OptimizingStrategyCache) ? workspace.num_actions : 1 + n_actions = + isa(strategy_cache, IntervalMDP.OptimizingStrategyCache) ? workspace.num_actions : 1 marginal = marginals(model)[1] n_states = source_shape(marginal)[1] @@ -490,7 +529,7 @@ function try_large_sparse_bellman!( end wanted_threads = nextwarp(device(), workspace.max_support) - threads = min(1024, max_threads, wanted_threads) + threads = min(1024, max_threads, wanted_threads) blocks = min(2^16 - 1, n_states) kernel( @@ -522,8 +561,16 @@ function large_sparse_bellman_kernel!( value_lt, action_reduce, ) where {Tv, T1, T2} - action_workspace = initialize_large_sparse_action_workspace(workspace, strategy_cache, marginal) - value_ws, gap_ws = initialize_large_sparse_value_and_gap(T1, T2, workspace, strategy_cache, V, marginal) + action_workspace = + initialize_large_sparse_action_workspace(workspace, strategy_cache, marginal) + value_ws, gap_ws = initialize_large_sparse_value_and_gap( + T1, + T2, + workspace, + strategy_cache, + V, + marginal, + ) jₛ = blockIdx().x @inbounds while jₛ <= source_shape(marginal)[1] # Grid-stride loop @@ -548,16 +595,17 @@ end @inline function initialize_large_sparse_action_workspace( workspace, ::OptimizingActiveCache, - marginal + marginal, ) - action_workspace = CuDynamicSharedArray(IntervalMDP.valuetype(marginal), workspace.num_actions) + action_workspace = + CuDynamicSharedArray(IntervalMDP.valuetype(marginal), workspace.num_actions) return action_workspace end @inline function initialize_large_sparse_action_workspace( workspace, ::NonOptimizingActiveCache, - marginal + marginal, ) return nothing end @@ -568,12 +616,17 @@ end workspace, ::OptimizingActiveCache, V, - marginal + marginal, ) where {T1, T2} Tv = IntervalMDP.valuetype(marginal) - value_ws = CuDynamicSharedArray(T1, workspace.max_support, workspace.num_actions * sizeof(Tv)) - gap_ws = CuDynamicSharedArray(T2, workspace.max_support, workspace.num_actions * sizeof(Tv) + workspace.max_support * sizeof(T1)) + value_ws = + CuDynamicSharedArray(T1, workspace.max_support, workspace.num_actions * sizeof(Tv)) + gap_ws = CuDynamicSharedArray( + T2, + workspace.max_support, + workspace.num_actions * sizeof(Tv) + workspace.max_support * sizeof(T1), + ) return value_ws, gap_ws end @@ -584,11 +637,12 @@ end workspace, ::OptimizingActiveCache, V, - marginal + marginal, ) where {T1} Tv = IntervalMDP.valuetype(marginal) - value_ws = CuDynamicSharedArray(T1, workspace.max_support, workspace.num_actions * sizeof(Tv)) + value_ws = + CuDynamicSharedArray(T1, workspace.max_support, workspace.num_actions * sizeof(Tv)) return value_ws, nothing end @@ -599,10 +653,11 @@ end workspace, ::NonOptimizingActiveCache, V, - marginal + marginal, ) where {T1, T2} value_ws = CuDynamicSharedArray(T1, workspace.max_support) - gap_ws = CuDynamicSharedArray(T2, workspace.max_support, workspace.max_support * sizeof(T1)) + gap_ws = + CuDynamicSharedArray(T2, workspace.max_support, workspace.max_support * sizeof(T1)) return value_ws, gap_ws end @@ -613,7 +668,7 @@ end workspace, ::NonOptimizingActiveCache, V, - marginal + marginal, ) where {T1} value_ws = CuDynamicSharedArray(T1, workspace.max_support) @@ -639,13 +694,7 @@ end ambiguity_set = marginal[(jₐ,), (jₛ,)] # Use O-maxmization to find the value for the action - v = state_action_sparse_omaximization!( - value_ws, - gap_ws, - V, - ambiguity_set, - value_lt - ) + v = state_action_sparse_omaximization!(value_ws, gap_ws, V, ambiguity_set, value_lt) if threadIdx().x == one(Int32) action_workspace[jₐ] = v @@ -691,13 +740,7 @@ end ambiguity_set = marginal[jₐ, (jₛ,)] # Use O-maxmization to find the value for the action - v = state_action_sparse_omaximization!( - value_ws, - gap_ws, - V, - ambiguity_set, - value_lt, - ) + v = state_action_sparse_omaximization!(value_ws, gap_ws, V, ambiguity_set, value_lt) if threadIdx().x == one(Int32) Vres[jₛ] = v @@ -725,10 +768,7 @@ end return value end -@inline function add_lower_mul_V_block( - V::AbstractVector{R}, - ambiguity_set, -) where {R} +@inline function add_lower_mul_V_block(V::AbstractVector{R}, ambiguity_set) where {R} share_ws = CuStaticSharedArray(R, 1) supportsize = IntervalMDP.supportsize(ambiguity_set) @@ -762,12 +802,7 @@ end return lower_value, remaining end -@inline function ff_sparse_initialize_sorting_shared_memory!( - V, - ambiguity_set, - value, - prob, -) +@inline function ff_sparse_initialize_sorting_shared_memory!(V, ambiguity_set, value, prob) support = IntervalMDP.support(ambiguity_set) supportsize = IntervalMDP.supportsize(ambiguity_set) @@ -785,11 +820,7 @@ end sync_threads() end -@inline function ff_add_gap_mul_V_sparse( - value, - prob, - remaining::Tv, -) where {Tv} +@inline function ff_add_gap_mul_V_sparse(value, prob, remaining::Tv) where {Tv} assume(warpsize() == 32) wid, lane = fldmod1(threadIdx().x, warpsize()) reduction_ws = CuStaticSharedArray(Tv, 32) @@ -849,7 +880,7 @@ end perm::AbstractVector{Int32}, V, ambiguity_set, - value_lt + value_lt, ) where {Tv} fi_sparse_initialize_sorting_shared_memory!(V, ambiguity_set, value, perm) @@ -945,7 +976,7 @@ end Pperm::AbstractVector{Int32}, V, ambiguity_set, - value_lt + value_lt, ) ii_sparse_initialize_sorting_shared_memory!(ambiguity_set, Vperm, Pperm) @@ -1037,13 +1068,12 @@ end return gap_value end - @inline function state_action_sparse_omaximization!( perm::AbstractVector{Int32}, ::Nothing, V, ambiguity_set, - value_lt + value_lt, ) i_sparse_initialize_sorting_shared_memory!(ambiguity_set, perm) diff --git a/ext/cuda/probabilities.jl b/ext/cuda/probabilities.jl index f9436105..9b804521 100644 --- a/ext/cuda/probabilities.jl +++ b/ext/cuda/probabilities.jl @@ -1,18 +1,27 @@ -function IntervalMDP.compute_gap( - lower::M, - upper::M, -) where {Tv, M <: CuSparseMatrixCSC{Tv}} +function IntervalMDP.compute_gap(lower::M, upper::M) where {Tv, M <: CuSparseMatrixCSC{Tv}} # FIXME: This is an ugly, non-robust hack. upper = SparseMatrixCSC(upper) lower = SparseMatrixCSC(lower) lower, gap = IntervalMDP.compute_gap(lower, upper) - return adapt(IntervalMDP.CuModelAdaptor{Tv}, lower), adapt(IntervalMDP.CuModelAdaptor{Tv}, gap) + return adapt(IntervalMDP.CuModelAdaptor{Tv}, lower), + adapt(IntervalMDP.CuModelAdaptor{Tv}, gap) end -const CuSparseDeviceColumnView{Tv, Ti} = SubArray{Tv, 1, <:CuSparseDeviceMatrixCSC{Tv, Ti}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} -IntervalMDP.support(p::IntervalMDP.IntervalAmbiguitySet{R, <:CuSparseDeviceColumnView{R}}) where {R} = rowvals(p.gap) -IntervalMDP.supportsize(p::IntervalMDP.IntervalAmbiguitySet{R, <:CuSparseDeviceColumnView{R}}) where {R} = nnz(p.gap) +const CuSparseDeviceColumnView{Tv, Ti} = SubArray{ + Tv, + 1, + <:CuSparseDeviceMatrixCSC{Tv, Ti}, + Tuple{Base.Slice{Base.OneTo{Int}}, Int}, +} +IntervalMDP.support( + p::IntervalMDP.IntervalAmbiguitySet{R, <:CuSparseDeviceColumnView{R}}, +) where {R} = rowvals(p.gap) +IntervalMDP.supportsize( + p::IntervalMDP.IntervalAmbiguitySet{R, <:CuSparseDeviceColumnView{R}}, +) where {R} = nnz(p.gap) -IntervalMDP.maxsupportsize(p::IntervalMDP.IntervalAmbiguitySets{R, <:CuSparseMatrixCSC{R}}) where {R} = maxdiff(SparseArrays.getcolptr(p.gap)) +IntervalMDP.maxsupportsize( + p::IntervalMDP.IntervalAmbiguitySets{R, <:CuSparseMatrixCSC{R}}, +) where {R} = maxdiff(SparseArrays.getcolptr(p.gap)) diff --git a/ext/cuda/strategy.jl b/ext/cuda/strategy.jl index d5bac99d..add95523 100644 --- a/ext/cuda/strategy.jl +++ b/ext/cuda/strategy.jl @@ -7,7 +7,8 @@ Adapt.@adapt_structure NoStrategyActiveCache return NoStrategyActiveCache() end -struct TimeVaryingStrategyActiveCache{N, V <: AbstractVector{NTuple{N, Int32}}} <: OptimizingActiveCache +struct TimeVaryingStrategyActiveCache{N, V <: AbstractVector{NTuple{N, Int32}}} <: + OptimizingActiveCache cur_strategy::V end Adapt.@adapt_structure TimeVaryingStrategyActiveCache @@ -15,7 +16,8 @@ Adapt.@adapt_structure TimeVaryingStrategyActiveCache return TimeVaryingStrategyActiveCache(strategy_cache.cur_strategy) end -struct StationaryStrategyActiveCache{N, V <: AbstractVector{NTuple{N, Int32}}} <: OptimizingActiveCache +struct StationaryStrategyActiveCache{N, V <: AbstractVector{NTuple{N, Int32}}} <: + OptimizingActiveCache strategy::V end Adapt.@adapt_structure StationaryStrategyActiveCache @@ -25,7 +27,8 @@ end abstract type NonOptimizingActiveCache <: ActiveCache end -struct GivenStrategyActiveCache{N, V <: AbstractVector{NTuple{N, Int32}}} <: NonOptimizingActiveCache +struct GivenStrategyActiveCache{N, V <: AbstractVector{NTuple{N, Int32}}} <: + NonOptimizingActiveCache strategy::V end Adapt.@adapt_structure GivenStrategyActiveCache diff --git a/ext/cuda/workspace.jl b/ext/cuda/workspace.jl index f54d791b..c885f4bd 100644 --- a/ext/cuda/workspace.jl +++ b/ext/cuda/workspace.jl @@ -11,7 +11,7 @@ IntervalMDP.construct_workspace( prob::IntervalAmbiguitySets{R, MR}, ::OMaximization = IntervalMDP.default_bellman_algorithm(prob); num_actions = 1, - kwargs... + kwargs..., ) where {R, MR <: AbstractGPUMatrix{R}} = CuDenseOMaxWorkspace(num_actions) #################### @@ -31,5 +31,5 @@ IntervalMDP.construct_workspace( prob::IntervalAmbiguitySets{R, MR}, ::OMaximization = IntervalMDP.default_bellman_algorithm(prob); num_actions = 1, - kwargs... + kwargs..., ) where {R, MR <: AbstractCuSparseMatrix{R}} = CuSparseOMaxWorkspace(prob, num_actions) diff --git a/src/Data/bmdp-tool.jl b/src/Data/bmdp-tool.jl index d72428ef..3994b475 100644 --- a/src/Data/bmdp-tool.jl +++ b/src/Data/bmdp-tool.jl @@ -68,11 +68,19 @@ function read_bmdp_tool_file(path) state_action_probs_upper = spzeros(Float64, Int32, num_states) if src != jₛ - 1 - throw(ArgumentError("Transitions file is not sorted by source index or the number of actions was less than expected. Expected source index $(jₛ - 1), got $src.")) + throw( + ArgumentError( + "Transitions file is not sorted by source index or the number of actions was less than expected. Expected source index $(jₛ - 1), got $src.", + ), + ) end if act != jₐ - 1 - throw(ArgumentError("Transitions file is not sorted by action index or the number of actions was less than expected. Expected action index $(jₐ - 1), got $act.")) + throw( + ArgumentError( + "Transitions file is not sorted by action index or the number of actions was less than expected. Expected action index $(jₐ - 1), got $act.", + ), + ) end while src == jₛ - 1 && act == jₐ - 1 diff --git a/src/Data/intervalmdp.jl b/src/Data/intervalmdp.jl index 3e2acbed..f48f135d 100644 --- a/src/Data/intervalmdp.jl +++ b/src/Data/intervalmdp.jl @@ -60,7 +60,7 @@ function read_intervalmdp_jl_model(model_path) upper_nzval, ) - prob = IntervalAmbiguitySets(; lower=P̲, upper=P̅) + prob = IntervalAmbiguitySets(; lower = P̲, upper = P̅) stateptr = convert.(Int32, dataset["stateptr"][:]) num_actions = diff(stateptr) if any(num_actions .!= num_actions[1]) @@ -174,9 +174,19 @@ Write an `IntervalMarkovDecisionProcess` to an IntervalMDP.jl system file (netCD See [Data storage formats](@ref) for more information on the file format. """ write_intervalmdp_jl_model(model_path, mdp::IntervalMDP.FactoredRMDP; deflate_level = 5) = - _write_intervalmdp_jl_model(model_path, mdp, IntervalMDP.modeltype(mdp); deflate_level = deflate_level) + _write_intervalmdp_jl_model( + model_path, + mdp, + IntervalMDP.modeltype(mdp); + deflate_level = deflate_level, + ) -function _write_intervalmdp_jl_model(model_path, mdp::IntervalMDP.FactoredRMDP, ::IntervalMDP.IsIMDP; deflate_level) +function _write_intervalmdp_jl_model( + model_path, + mdp::IntervalMDP.FactoredRMDP, + ::IntervalMDP.IsIMDP; + deflate_level, +) Dataset(model_path, "c") do dataset dataset.attrib["model"] = "imdp" dataset.attrib["format"] = "sparse_csc" @@ -189,7 +199,13 @@ function _write_intervalmdp_jl_model(model_path, mdp::IntervalMDP.FactoredRMDP, istates = Int32[] end defDim(dataset, "initial_states", length(istates)) - v = defVar(dataset, "initial_states", Int32, ("initial_states",); deflatelevel = deflate_level) + v = defVar( + dataset, + "initial_states", + Int32, + ("initial_states",); + deflatelevel = deflate_level, + ) v[:] = istates marginal = marginals(mdp)[1] @@ -198,11 +214,23 @@ function _write_intervalmdp_jl_model(model_path, mdp::IntervalMDP.FactoredRMDP, g = as.gap defDim(dataset, "lower_colptr", length(l.colptr)) - v = defVar(dataset, "lower_colptr", Int32, ("lower_colptr",); deflatelevel = deflate_level) + v = defVar( + dataset, + "lower_colptr", + Int32, + ("lower_colptr",); + deflatelevel = deflate_level, + ) v[:] = l.colptr defDim(dataset, "lower_rowval", length(l.rowval)) - v = defVar(dataset, "lower_rowval", Int32, ("lower_rowval",); deflatelevel = deflate_level) + v = defVar( + dataset, + "lower_rowval", + Int32, + ("lower_rowval",); + deflatelevel = deflate_level, + ) v[:] = l.rowval defDim(dataset, "lower_nzval", length(l.nzval)) @@ -216,11 +244,23 @@ function _write_intervalmdp_jl_model(model_path, mdp::IntervalMDP.FactoredRMDP, v[:] = l.nzval defDim(dataset, "upper_colptr", length(g.colptr)) - v = defVar(dataset, "upper_colptr", Int32, ("upper_colptr",); deflatelevel = deflate_level) + v = defVar( + dataset, + "upper_colptr", + Int32, + ("upper_colptr",); + deflatelevel = deflate_level, + ) v[:] = g.colptr defDim(dataset, "upper_rowval", length(g.rowval)) - v = defVar(dataset, "upper_rowval", Int32, ("upper_rowval",); deflatelevel = deflate_level) + v = defVar( + dataset, + "upper_rowval", + Int32, + ("upper_rowval",); + deflatelevel = deflate_level, + ) v[:] = g.rowval defDim(dataset, "upper_nzval", length(g.nzval)) @@ -232,7 +272,7 @@ function _write_intervalmdp_jl_model(model_path, mdp::IntervalMDP.FactoredRMDP, deflatelevel = deflate_level, ) v[:] = l.nzval + g.nzval - + defDim(dataset, "stateptr", source_shape(marginal)[1] + 1) v = defVar(dataset, "stateptr", Int32, ("stateptr",)) v[:] = [[Int32(1)]; (1:num_states(mdp)) .* Int32(num_actions(mdp)) .+ 1] diff --git a/src/Data/prism.jl b/src/Data/prism.jl index f0fe50c5..ce708c5e 100644 --- a/src/Data/prism.jl +++ b/src/Data/prism.jl @@ -53,8 +53,13 @@ function write_prism_file( write_prism_spec(lab_path, srew_path, pctl_path, mdp, spec) end -write_prism_states_file(sta_path, mdp::IntervalMDP.FactoredRMDP) = _write_prism_states_file(sta_path, mdp, IntervalMDP.modeltype(mdp)) -function _write_prism_states_file(sta_path, mdp::IntervalMDP.FactoredRMDP, ::IntervalMDP.NonFactored) +write_prism_states_file(sta_path, mdp::IntervalMDP.FactoredRMDP) = + _write_prism_states_file(sta_path, mdp, IntervalMDP.modeltype(mdp)) +function _write_prism_states_file( + sta_path, + mdp::IntervalMDP.FactoredRMDP, + ::IntervalMDP.NonFactored, +) number_states = num_states(mdp) open(sta_path, "w") do io @@ -67,10 +72,23 @@ function _write_prism_states_file(sta_path, mdp::IntervalMDP.FactoredRMDP, ::Int end end -write_prism_transitions_file(tra_path, mdp::IntervalMDP.FactoredRMDP; lb_threshold = 1e-12) = - _write_prism_transitions_file(tra_path, mdp, IntervalMDP.modeltype(mdp); lb_threshold = lb_threshold) +write_prism_transitions_file( + tra_path, + mdp::IntervalMDP.FactoredRMDP; + lb_threshold = 1e-12, +) = _write_prism_transitions_file( + tra_path, + mdp, + IntervalMDP.modeltype(mdp); + lb_threshold = lb_threshold, +) -function _write_prism_transitions_file(tra_path, mdp::IntervalMDP.FactoredRMDP, ::IntervalMDP.IsIMDP; lb_threshold) +function _write_prism_transitions_file( + tra_path, + mdp::IntervalMDP.FactoredRMDP, + ::IntervalMDP.IsIMDP; + lb_threshold, +) marginal = marginals(mdp)[1] num_transitions = nnz(ambiguity_sets(marginal).lower) # Number of non-zero entries in the lower bound matrix @@ -104,11 +122,7 @@ function write_prism_spec(lab_path, srew_path, pctl_path, mdp, spec) write_prism_props_file(pctl_path, spec) end -function write_prism_labels_file( - lab_path, - mdp, - prop::IntervalMDP.AbstractReachability, -) +function write_prism_labels_file(lab_path, mdp, prop::IntervalMDP.AbstractReachability) istates = initial_states(mdp) target_states = reach(prop) @@ -165,11 +179,7 @@ function write_prism_labels_file(lab_path, mdp, prop::IntervalMDP.AbstractReward end end -function write_prism_rewards_file( - lab_path, - mdp, - prop::IntervalMDP.AbstractReachability, -) +function write_prism_rewards_file(lab_path, mdp, prop::IntervalMDP.AbstractReachability) # Do nothing - no rewards for reachability return nothing end @@ -286,7 +296,11 @@ function read_prism_transitions_file(tra_path, num_states) read_prism_transitions_file_header(readline(io)) if num_states != num_states_t - throw(DimensionMismatch("Number of states in .sta file ($num_states) does not match number of states in .tra file ($num_states_t).")) + throw( + DimensionMismatch( + "Number of states in .sta file ($num_states) does not match number of states in .tra file ($num_states_t).", + ), + ) end if num_choices <= 0 @@ -294,11 +308,19 @@ function read_prism_transitions_file(tra_path, num_states) end if num_transitions <= 0 - throw(ArgumentError("Number of transitions must be positive, was $num_transitions.")) + throw( + ArgumentError( + "Number of transitions must be positive, was $num_transitions.", + ), + ) end if num_choices % num_states_t != 0 - throw(ArgumentError("Number of choices ($num_choices) must be a multiple of the number of states ($num_states_t).")) + throw( + ArgumentError( + "Number of choices ($num_choices) must be a multiple of the number of states ($num_states_t).", + ), + ) end num_actions = num_choices ÷ num_states_t num_src_states = num_choices ÷ num_actions @@ -320,13 +342,21 @@ function read_prism_transitions_file(tra_path, num_states) for jₐ in 1:num_actions state_action_probs_lower = spzeros(Float64, Int32, num_states) state_action_probs_upper = spzeros(Float64, Int32, num_states) - + if src != jₛ - 1 - throw(ArgumentError("Transitions file is not sorted by source index or the number of actions was less than expected. Expected source index $(jₛ - 1), got $src.")) + throw( + ArgumentError( + "Transitions file is not sorted by source index or the number of actions was less than expected. Expected source index $(jₛ - 1), got $src.", + ), + ) end if act != jₐ - 1 - throw(ArgumentError("Transitions file is not sorted by action index or the number of actions was less than expected. Expected action index $(jₐ - 1), got $act.")) + throw( + ArgumentError( + "Transitions file is not sorted by action index or the number of actions was less than expected. Expected action index $(jₐ - 1), got $act.", + ), + ) end while src == jₛ - 1 && act == jₐ - 1 diff --git a/src/algorithms.jl b/src/algorithms.jl index 19de7506..42531514 100644 --- a/src/algorithms.jl +++ b/src/algorithms.jl @@ -5,26 +5,48 @@ Base.@kwdef struct LPMcCormickRelaxation{O} <: BellmanAlgorithm end struct VertexEnumeration <: BellmanAlgorithm end -default_bellman_algorithm(pp::ProductProcess) = default_bellman_algorithm(markov_process(pp)) -default_bellman_algorithm(mdp::FactoredRMDP) = default_bellman_algorithm(mdp, modeltype(mdp)) +default_bellman_algorithm(pp::ProductProcess) = + default_bellman_algorithm(markov_process(pp)) +default_bellman_algorithm(mdp::FactoredRMDP) = + default_bellman_algorithm(mdp, modeltype(mdp)) default_bellman_algorithm(::FactoredRMDP, ::IsIMDP) = OMaximization() default_bellman_algorithm(::FactoredRMDP, ::IsFIMDP) = LPMcCormickRelaxation() default_bellman_algorithm(::IntervalAmbiguitySets) = OMaximization() -function showbellmanalg(io::IO, prefix, ::IsIMDP,::OMaximization) - println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:O-Maximization}") +function showbellmanalg(io::IO, prefix, ::IsIMDP, ::OMaximization) + println( + io, + prefix, + "└─", + styled"Default Bellman operator algorithm: {green:O-Maximization}", + ) end -function showbellmanalg(io::IO, prefix, ::IsFIMDP,::OMaximization) - println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:Recursive O-Maximization}") +function showbellmanalg(io::IO, prefix, ::IsFIMDP, ::OMaximization) + println( + io, + prefix, + "└─", + styled"Default Bellman operator algorithm: {green:Recursive O-Maximization}", + ) end function showbellmanalg(io::IO, prefix, ::IsFIMDP, ::LPMcCormickRelaxation) - println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:Binary tree LP McCormick Relaxation}") + println( + io, + prefix, + "└─", + styled"Default Bellman operator algorithm: {green:Binary tree LP McCormick Relaxation}", + ) end function showbellmanalg(io::IO, prefix, ::IsFIMDP, ::VertexEnumeration) - println(io, prefix, "└─", styled"Default Bellman operator algorithm: {green:Vertex Enumeration}") + println( + io, + prefix, + "└─", + styled"Default Bellman operator algorithm: {green:Vertex Enumeration}", + ) end function showbellmanalg(io::IO, prefix, _, ::BellmanAlgorithm) @@ -59,16 +81,21 @@ struct IntervalValueIteration <: ModelCheckingAlgorithm end ##### Default algorithm for solving Interval MDP problems default_algorithm(problem::AbstractIntervalMDPProblem) = default_algorithm(system(problem)) -default_algorithm(system::StochasticProcess) = RobustValueIteration(default_bellman_algorithm(system)) +default_algorithm(system::StochasticProcess) = + RobustValueIteration(default_bellman_algorithm(system)) solve(problem::AbstractIntervalMDPProblem; kwargs...) = solve(problem, default_algorithm(problem); kwargs...) - function showmcalgorithm(io::IO, prefix, ::RobustValueIteration) - println(io, prefix,"├─", styled"Default model checking algorithm: {green:Robust Value Iteration}") + println( + io, + prefix, + "├─", + styled"Default model checking algorithm: {green:Robust Value Iteration}", + ) end function showmcalgorithm(io::IO, prefix, ::ModelCheckingAlgorithm) - println(io, prefix,"├─", styled"Default model checking algorithm: {green:None}") -end \ No newline at end of file + println(io, prefix, "├─", styled"Default model checking algorithm: {green:None}") +end diff --git a/src/bellman.jl b/src/bellman.jl index eac8348f..b5709874 100644 --- a/src/bellman.jl +++ b/src/bellman.jl @@ -71,7 +71,13 @@ Vcur = IntervalMDP.bellman(Vprev, model; upper_bound = false) For a hot-loop, it is more efficient to use `bellman!` and pass in pre-allocated objects. """ -function bellman(V, model, alg=default_bellman_algorithm(model); upper_bound = false, maximize = true) +function bellman( + V, + model, + alg = default_bellman_algorithm(model); + upper_bound = false, + maximize = true, +) Vres = similar(V, source_shape(model)) return bellman!(Vres, V, model, alg; upper_bound = upper_bound, maximize = maximize) @@ -156,7 +162,14 @@ IntervalMDP.bellman!(workspace, strategy_cache, Vcur, Vprev, model; upper_bound """ function bellman! end -function bellman!(Vres, V, model, alg=default_bellman_algorithm(model); upper_bound = false, maximize = true) +function bellman!( + Vres, + V, + model, + alg = default_bellman_algorithm(model); + upper_bound = false, + maximize = true, +) workspace = construct_workspace(model, alg) strategy_cache = construct_strategy_cache(model) @@ -293,7 +306,10 @@ end # Threaded function _bellman_helper!( - workspace::Union{ThreadedDenseIntervalOMaxWorkspace, ThreadedSparseIntervalOMaxWorkspace}, + workspace::Union{ + ThreadedDenseIntervalOMaxWorkspace, + ThreadedSparseIntervalOMaxWorkspace, + }, strategy_cache::AbstractStrategyCache, Vres, V, @@ -307,16 +323,7 @@ function _bellman_helper!( @threadstid tid for jₛ in CartesianIndices(source_shape(marginal)) @inbounds ws = workspace[tid] - state_bellman!( - ws, - strategy_cache, - Vres, - V, - marginal, - jₛ, - upper_bound, - maximize, - ) + state_bellman!(ws, strategy_cache, Vres, V, marginal, jₛ, upper_bound, maximize) end return Vres @@ -351,7 +358,8 @@ function state_bellman!( for jₐ in CartesianIndices(action_shape(marginal)) ambiguity_set = marginal[jₐ, jₛ] budget = workspace.budget[sub2ind(marginal, jₐ, jₛ)] - workspace.actions[jₐ] = state_action_bellman(workspace, V, ambiguity_set, budget, upper_bound) + workspace.actions[jₐ] = + state_action_bellman(workspace, V, ambiguity_set, budget, upper_bound) end Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, maximize) @@ -383,7 +391,8 @@ Base.@propagate_inbounds function state_action_bellman( budget, upper_bound, ) - return dot(V, lower(ambiguity_set)) + gap_value(V, gap(ambiguity_set), budget, permutation(workspace)) + return dot(V, lower(ambiguity_set)) + + gap_value(V, gap(ambiguity_set), budget, permutation(workspace)) end Base.@propagate_inbounds function gap_value( @@ -460,16 +469,7 @@ function _bellman_helper!( maximize = true, ) for jₛ in CartesianIndices(source_shape(model)) - state_bellman!( - workspace, - strategy_cache, - Vres, - V, - model, - jₛ, - upper_bound, - maximize, - ) + state_bellman!(workspace, strategy_cache, Vres, V, model, jₛ, upper_bound, maximize) end return Vres @@ -487,22 +487,12 @@ function _bellman_helper!( ) @threadstid tid for jₛ in CartesianIndices(source_shape(model)) @inbounds ws = workspace[tid] - state_bellman!( - ws, - strategy_cache, - Vres, - V, - model, - jₛ, - upper_bound, - maximize, - ) + state_bellman!(ws, strategy_cache, Vres, V, model, jₛ, upper_bound, maximize) end return Vres end - function state_bellman!( workspace::FactoredIntervalMcCormickWorkspace, strategy_cache::OptimizingStrategyCache, @@ -516,7 +506,8 @@ function state_bellman!( @inbounds begin for jₐ in CartesianIndices(action_shape(model)) ambiguity_sets = getindex.(marginals(model), jₐ, jₛ) - workspace.actions[jₐ] = state_action_bellman(workspace, V, ambiguity_sets, upper_bound) + workspace.actions[jₐ] = + state_action_bellman(workspace, V, ambiguity_sets, upper_bound) end Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, maximize) @@ -550,7 +541,7 @@ Base.@propagate_inbounds function state_action_bellman( model = workspace.model JuMP.empty!(model) - + # Recursively add McCormick variables and constraints for each ambiguity set p, _, _ = mccormick_branch(model, ambiguity_sets) @@ -587,7 +578,7 @@ function mccormick_branch(model, ambiguity_sets) else mid = fld(length(ambiguity_sets), 2) + 1 p, p_lower, p_upper = mccormick_branch(model, ambiguity_sets[1:mid]) - q, q_lower, q_upper = mccormick_branch(model, ambiguity_sets[mid+1:end]) + q, q_lower, q_upper = mccormick_branch(model, ambiguity_sets[(mid + 1):end]) end # McCormick envelopes @@ -600,11 +591,31 @@ function mccormick_branch(model, ambiguity_sets) w_lower[I, J] = p_lower[I] * q_lower[J] w_upper[I, J] = p_upper[I] * q_upper[J] - w[I, J] = @variable(model, lower_bound = w_lower[I, J], upper_bound = w_upper[I, J]) - @constraint(model, w[I, J] >= p[I] * q_lower[J] + q[J] * p_lower[I] − p_lower[I] * q_lower[J]) - @constraint(model, w[I, J] >= p[I] * q_upper[J] + q[J] * p_upper[I] − p_upper[I] * q_upper[J]) - @constraint(model, w[I, J] <= p[I] * q_upper[J] + q[J] * p_lower[I] − p_lower[I] * q_upper[J]) - @constraint(model, w[I, J] <= p[I] * q_lower[J] + q[J] * p_upper[I] − p_upper[I] * q_lower[J]) + w[I, J] = @variable( + model, + lower_bound = w_lower[I, J], + upper_bound = w_upper[I, J] + ) + @constraint( + model, + w[I, J] >= + p[I] * q_lower[J] + q[J] * p_lower[I] − p_lower[I] * q_lower[J] + ) + @constraint( + model, + w[I, J] >= + p[I] * q_upper[J] + q[J] * p_upper[I] − p_upper[I] * q_upper[J] + ) + @constraint( + model, + w[I, J] <= + p[I] * q_upper[J] + q[J] * p_lower[I] − p_lower[I] * q_upper[J] + ) + @constraint( + model, + w[I, J] <= + p[I] * q_lower[J] + q[J] * p_upper[I] − p_upper[I] * q_lower[J] + ) end end @constraint(model, sum(w) == one(eltype(p_lower))) @@ -613,7 +624,6 @@ function mccormick_branch(model, ambiguity_sets) end end - #################################################### # O-Maximization-based Bellman operator for fIMDPs # #################################################### @@ -684,8 +694,16 @@ function state_bellman!( @inbounds begin for jₐ in CartesianIndices(action_shape(model)) ambiguity_sets = getindex.(marginals(model), jₐ, jₛ) - budgets = ntuple(r -> workspace.budgets[r][sub2ind(marginals(model)[r], jₐ, jₛ)], N) - workspace.actions[jₐ] = state_action_bellman(workspace, V, model, ambiguity_sets, budgets, upper_bound) + budgets = + ntuple(r -> workspace.budgets[r][sub2ind(marginals(model)[r], jₐ, jₛ)], N) + workspace.actions[jₐ] = state_action_bellman( + workspace, + V, + model, + ambiguity_sets, + budgets, + upper_bound, + ) end Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, maximize) @@ -706,7 +724,8 @@ function state_bellman!( jₐ = CartesianIndex(strategy_cache[jₛ]) ambiguity_sets = getindex.(marginals(model), jₐ, jₛ) budgets = ntuple(r -> workspace.budgets[r][sub2ind(marginals(model)[r], jₐ, jₛ)], N) - Vres[jₛ] = state_action_bellman(workspace, V, model, ambiguity_sets, budgets, upper_bound) + Vres[jₛ] = + state_action_bellman(workspace, V, model, ambiguity_sets, budgets, upper_bound) end end @@ -728,7 +747,7 @@ Base.@propagate_inbounds function state_action_bellman( @view(V[:, I]), ambiguity_sets[1], budgets[1], - upper_bound + upper_bound, ) Vₑ[1][I[1]] = v @@ -750,7 +769,13 @@ Base.@propagate_inbounds function state_action_bellman( end # Last dimension - v = orthogonal_inner_bellman!(workspace, Vₑ[end], ambiguity_sets[end], budgets[end], upper_bound) + v = orthogonal_inner_bellman!( + workspace, + Vₑ[end], + ambiguity_sets[end], + budgets[end], + upper_bound, + ) return v end @@ -773,7 +798,6 @@ Base.@propagate_inbounds function orthogonal_inner_bellman!( return dot(V, lower(ambiguity_set)) + gap_value(Vp_workspace, budget) end - ########################################################## # Vertex enumeration-based Bellman operator for fIMDPs # ########################################################## @@ -789,16 +813,7 @@ function _bellman_helper!( maximize = true, ) for jₛ in CartesianIndices(source_shape(model)) - state_bellman!( - workspace, - strategy_cache, - Vres, - V, - model, - jₛ, - upper_bound, - maximize, - ) + state_bellman!(workspace, strategy_cache, Vres, V, model, jₛ, upper_bound, maximize) end return Vres @@ -816,16 +831,7 @@ function _bellman_helper!( ) @threadstid tid for jₛ in CartesianIndices(source_shape(model)) @inbounds ws = workspace[tid] - state_bellman!( - ws, - strategy_cache, - Vres, - V, - model, - jₛ, - upper_bound, - maximize, - ) + state_bellman!(ws, strategy_cache, Vres, V, model, jₛ, upper_bound, maximize) end return Vres @@ -844,7 +850,8 @@ function state_bellman!( @inbounds begin for jₐ in CartesianIndices(action_shape(model)) ambiguity_sets = getindex.(marginals(model), jₐ, jₛ) - workspace.actions[jₐ] = state_action_bellman(workspace, V, ambiguity_sets, upper_bound) + workspace.actions[jₐ] = + state_action_bellman(workspace, V, ambiguity_sets, upper_bound) end Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, maximize) @@ -880,9 +887,12 @@ Base.@propagate_inbounds function state_action_bellman( optfunc = upper_bound ? max : min for marginal_vertices in Iterators.product(iterators...) - v = sum(V[I] * prod(r -> marginal_vertices[r][I[r]], eachindex(ambiguity_sets)) for I in CartesianIndices(num_target.(ambiguity_sets))) + v = sum( + V[I] * prod(r -> marginal_vertices[r][I[r]], eachindex(ambiguity_sets)) for + I in CartesianIndices(num_target.(ambiguity_sets)) + ) optval = optfunc(optval, v) end return optval -end \ No newline at end of file +end diff --git a/src/models/DFA.jl b/src/models/DFA.jl index 1fd1e422..8e828b49 100644 --- a/src/models/DFA.jl +++ b/src/models/DFA.jl @@ -33,7 +33,7 @@ struct DFA{T <: TransitionFunction, DA <: AbstractDict{String, Int32}} <: # TODO: Add explicit sink states for non-accepting self-looping states since we do not need to iterate for these. # TODO: Detection of non-accepting end components. They can be replaced by a single state. - + function DFA( transition::T, initial_state::Int32, @@ -174,4 +174,4 @@ function showsystem(io::IO, first_prefix, prefix, dfa::DFA) println(io, prefix, styled"├─ Number of states: {magenta:$(num_states(dfa))}") println(io, prefix, styled"├─ Number of labels: {magenta:$(num_labels(dfa))}") println(io, prefix, styled"└─ Initial state: {magenta:$(initial_state(dfa))}") -end \ No newline at end of file +end diff --git a/src/models/FactoredRobustMarkovDecisionProcess.jl b/src/models/FactoredRobustMarkovDecisionProcess.jl index b8c910af..71669606 100644 --- a/src/models/FactoredRobustMarkovDecisionProcess.jl +++ b/src/models/FactoredRobustMarkovDecisionProcess.jl @@ -128,8 +128,8 @@ struct FactoredRobustMarkovDecisionProcess{ state_vars::NTuple{N, Int32} # N is the number of state variables and state_vars[n] is the number of states for state variable n action_vars::NTuple{M, Int32} # M is the number of action variables and action_vars[m] is the number of actions for action variable m - source_dims::NTuple{N, Int32} - + source_dims::NTuple{N, Int32} + transition::P initial_states::VI @@ -143,7 +143,13 @@ struct FactoredRobustMarkovDecisionProcess{ ) where {N, M, P <: NTuple{N, Marginal}, VI <: InitialStates} check_rmdp(state_vars, action_vars, source_dims, transition, initial_states) - return new{N, M, P, VI}(state_vars, action_vars, source_dims, transition, initial_states) + return new{N, M, P, VI}( + state_vars, + action_vars, + source_dims, + transition, + initial_states, + ) end function FactoredRobustMarkovDecisionProcess( @@ -154,7 +160,13 @@ struct FactoredRobustMarkovDecisionProcess{ initial_states::VI, check::Val{false}, ) where {N, M, P <: NTuple{N, Marginal}, VI <: InitialStates} - return new{N, M, P, VI}(state_vars, action_vars, source_dims, transition, initial_states) + return new{N, M, P, VI}( + state_vars, + action_vars, + source_dims, + transition, + initial_states, + ) end end const FactoredRMDP = FactoredRobustMarkovDecisionProcess @@ -166,7 +178,14 @@ function FactoredRMDP( transition::P, initial_states::VI = AllStates(), ) where {N, M, P <: NTuple{N, Marginal}, VI <: InitialStates} - return FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, transition, initial_states, Val(true)) + return FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + source_dims, + transition, + initial_states, + Val(true), + ) end function FactoredRMDP( @@ -180,7 +199,13 @@ function FactoredRMDP( action_vars_32 = Int32.(action_vars) source_dims_32 = Int32.(source_dims) - return FactoredRobustMarkovDecisionProcess(state_vars_32, action_vars_32, source_dims_32, transition, initial_states) + return FactoredRobustMarkovDecisionProcess( + state_vars_32, + action_vars_32, + source_dims_32, + transition, + initial_states, + ) end function FactoredRMDP( @@ -189,7 +214,13 @@ function FactoredRMDP( transition::NTuple{N, Marginal}, initial_states::VI = AllStates(), ) where {N, M, VI <: InitialStates} - return FactoredRobustMarkovDecisionProcess(state_vars, action_vars, state_vars, transition, initial_states) + return FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + state_vars, + transition, + initial_states, + ) end function check_rmdp(state_vars, action_vars, source_dims, transition, initial_states) @@ -204,8 +235,15 @@ function check_state_values(state_vars, source_dims) throw(ArgumentError("All state variables must be positive integers.")) end - if any(i -> source_dims[i] <= 0 || source_dims[i] > state_vars[i], eachindex(state_vars)) - throw(ArgumentError("All source dimensions must be positive integers and less than or equal to the corresponding state variable.")) + if any( + i -> source_dims[i] <= 0 || source_dims[i] > state_vars[i], + eachindex(state_vars), + ) + throw( + ArgumentError( + "All source dimensions must be positive integers and less than or equal to the corresponding state variable.", + ), + ) end end @@ -218,17 +256,29 @@ end function check_transition(state_dims, action_dims, source_dims, transition) for (i, marginal) in enumerate(transition) if num_target(marginal) != state_dims[i] - throw(DimensionMismatch("Marginal $i has incorrect number of target states. Expected $(state_dims[i]), got $(num_target(marginal)).")) + throw( + DimensionMismatch( + "Marginal $i has incorrect number of target states. Expected $(state_dims[i]), got $(num_target(marginal)).", + ), + ) end expected_source_shape = getindex.((source_dims,), state_variables(marginal)) if source_shape(marginal) != expected_source_shape - throw(DimensionMismatch("Marginal $i has incorrect source shape. Expected $expected_source_shape, got $(source_shape(marginal)).")) + throw( + DimensionMismatch( + "Marginal $i has incorrect source shape. Expected $expected_source_shape, got $(source_shape(marginal)).", + ), + ) end expected_action_shape = getindex.((action_dims,), action_variables(marginal)) if action_shape(marginal) != expected_action_shape - throw(DimensionMismatch("Marginal $i has incorrect action shape. Expected $expected_action_shape, got $(action_shape(marginal)).")) + throw( + DimensionMismatch( + "Marginal $i has incorrect action shape. Expected $expected_action_shape, got $(action_shape(marginal)).", + ), + ) end end end @@ -245,7 +295,11 @@ function check_initial_states(state_vars, initial_states) end if !all(1 .<= initial_state .<= state_vars) - throw(DimensionMismatch("Each initial state must be within the valid range of states (should be 1 .<= initial_state <= $state_vars, was initial_state=$initial_state).")) + throw( + DimensionMismatch( + "Each initial state must be within the valid range of states (should be 1 .<= initial_state <= $state_vars, was initial_state=$initial_state).", + ), + ) end end end @@ -254,7 +308,7 @@ end state_values(mdp::FactoredRMDP) Return a tuple with the number of states for each state variable in the fRMDP. -""" +""" state_values(mdp::FactoredRMDP) = mdp.state_vars state_values(mdp::FactoredRMDP, r) = mdp.state_vars[r] @@ -307,13 +361,13 @@ modeltype(mdp::FactoredRMDP{N}) where {N} = modeltype(mdp, isinterval.(mdp.trans modeltype(::FactoredRMDP{N}, ::NTuple{N, IsInterval}) where {N} = IsFIMDP() # If not, check if all marginals are polytopic ambiguity sets -modeltype(::FactoredRMDP{N}, ::NTuple{N, AbstractIsInterval}) where {N} = modeltype(mdp, ispolytopic.(mdp.transition)) +modeltype(::FactoredRMDP{N}, ::NTuple{N, AbstractIsInterval}) where {N} = + modeltype(mdp, ispolytopic.(mdp.transition)) modeltype(::FactoredRMDP{N}, ::NTuple{N, IsPolytopic}) where {N} = IsFPMDP() # Otherwise, it is a general factored robust MDP modeltype(::FactoredRMDP{N}, ::NTuple{N, AbstractIsPolytopic}) where {N} = IsFRMDP() - ### Pretty printing function Base.show(io::IO, mime::MIME"text/plain", mdp::FactoredRMDP) showsystem(io, "", "", mdp) @@ -321,8 +375,20 @@ end function showsystem(io::IO, first_prefix, prefix, mdp::FactoredRMDP{N, M}) where {N, M} println(io, first_prefix, styled"{code:FactoredRobustMarkovDecisionProcess}") - println(io, prefix, "├─ ", N, styled" state variables with cardinality: {magenta:$(state_values(mdp))}") - println(io, prefix, "├─ ", M, styled" action variables with cardinality: {magenta:$(action_values(mdp))}") + println( + io, + prefix, + "├─ ", + N, + styled" state variables with cardinality: {magenta:$(state_values(mdp))}", + ) + println( + io, + prefix, + "├─ ", + M, + styled" action variables with cardinality: {magenta:$(action_values(mdp))}", + ) if initial_states(mdp) isa AllStates println(io, prefix, "├─ ", styled"Initial states: {magenta:All states}") else @@ -331,7 +397,7 @@ function showsystem(io::IO, first_prefix, prefix, mdp::FactoredRMDP{N, M}) where println(io, prefix, "├─ ", styled"Transition marginals:") marginal_prefix = prefix * "│ " - for (i, marginal) in enumerate(mdp.transition[1:end - 1]) + for (i, marginal) in enumerate(mdp.transition[1:(end - 1)]) println(io, marginal_prefix, "├─ Marginal $i: ") showmarginal(io, marginal_prefix * "│ ", marginal) end @@ -373,4 +439,4 @@ end function showmodeltype(io::IO, prefix, ::IsRMDP) println(io, prefix, "├─", styled"Model type: {green:Robust MDP}") -end \ No newline at end of file +end diff --git a/src/models/IntervalMarkovChain.jl b/src/models/IntervalMarkovChain.jl index 8a1b98e7..b3149d7a 100644 --- a/src/models/IntervalMarkovChain.jl +++ b/src/models/IntervalMarkovChain.jl @@ -1,9 +1,16 @@ -function IntervalMarkovChain(marginal::Marginal{<:IntervalAmbiguitySets}, initial_states=AllStates()) +function IntervalMarkovChain( + marginal::Marginal{<:IntervalAmbiguitySets}, + initial_states = AllStates(), +) state_vars = (Int32(num_target(marginal)),) source_dims = source_shape(marginal) if action_shape(marginal) != (1,) - throw(DimensionMismatch("The action shape of the marginal must be (1,) for an IntervalMarkovChain. Got $(action_shape(marginal)).")) + throw( + DimensionMismatch( + "The action shape of the marginal must be (1,) for an IntervalMarkovChain. Got $(action_shape(marginal)).", + ), + ) end action_vars = (Int32(1),) @@ -70,10 +77,13 @@ FactoredRobustMarkovDecisionProcess └─Default Bellman operator algorithm: O-Maximization ``` """ -function IntervalMarkovChain(ambiguity_set::IntervalAmbiguitySets, initial_states=AllStates()) +function IntervalMarkovChain( + ambiguity_set::IntervalAmbiguitySets, + initial_states = AllStates(), +) source_dims = (num_sets(ambiguity_set),) action_vars = (1,) marginal = Marginal(ambiguity_set, source_dims, action_vars) return IntervalMarkovChain(marginal, initial_states) -end \ No newline at end of file +end diff --git a/src/models/IntervalMarkovDecisionProcess.jl b/src/models/IntervalMarkovDecisionProcess.jl index 1e638e10..7ec78349 100644 --- a/src/models/IntervalMarkovDecisionProcess.jl +++ b/src/models/IntervalMarkovDecisionProcess.jl @@ -1,16 +1,13 @@ -function IntervalMarkovDecisionProcess(marginal::Marginal{<:IntervalAmbiguitySets}, initial_states::InitialStates = AllStates()) +function IntervalMarkovDecisionProcess( + marginal::Marginal{<:IntervalAmbiguitySets}, + initial_states::InitialStates = AllStates(), +) state_vars = (Int32(num_target(marginal)),) action_vars = action_shape(marginal) source_dims = source_shape(marginal) transition = (marginal,) - return FactoredRMDP( - state_vars, - action_vars, - source_dims, - transition, - initial_states - ) + return FactoredRMDP(state_vars, action_vars, source_dims, transition, initial_states) end """ @@ -91,9 +88,17 @@ FactoredRobustMarkovDecisionProcess ``` """ -function IntervalMarkovDecisionProcess(ambiguity_set::IntervalAmbiguitySets, num_actions::Integer, initial_states::InitialStates = AllStates()) +function IntervalMarkovDecisionProcess( + ambiguity_set::IntervalAmbiguitySets, + num_actions::Integer, + initial_states::InitialStates = AllStates(), +) if num_sets(ambiguity_set) % num_actions != 0 - throw(ArgumentError("The number of sets in the ambiguity set must be a multiple of the number of actions.")) + throw( + ArgumentError( + "The number of sets in the ambiguity set must be a multiple of the number of actions.", + ), + ) end source_dims = (num_sets(ambiguity_set) ÷ num_actions,) @@ -167,7 +172,11 @@ function interval_prob_hcat( num_actions = num_sets(ps[1]) for (i, p) in enumerate(ps) if num_sets(p) != num_actions - throw(DimensionMismatch("All IntervalAmbiguitySets must have the same number of sets (actions). Expected $num_actions, was $(num_sets(p)) at index $i.")) + throw( + DimensionMismatch( + "All IntervalAmbiguitySets must have the same number of sets (actions). Expected $num_actions, was $(num_sets(p)) at index $i.", + ), + ) end end @@ -181,4 +190,4 @@ function interval_prob_hcat( marginal = Marginal(ambiguity_set, source_dims, action_vars) return marginal -end \ No newline at end of file +end diff --git a/src/models/ProductProcess.jl b/src/models/ProductProcess.jl index fa205b39..05520cb1 100644 --- a/src/models/ProductProcess.jl +++ b/src/models/ProductProcess.jl @@ -94,18 +94,25 @@ Return the labelling function of the product """ labelling_function(proc::ProductProcess) = proc.labelling_func -state_values(proc::ProductProcess) = (state_values(markov_process(proc))..., num_states(automaton(proc))) -source_shape(proc::ProductProcess) = (source_shape(markov_process(proc))..., num_states(automaton(proc))) +state_values(proc::ProductProcess) = + (state_values(markov_process(proc))..., num_states(automaton(proc))) +source_shape(proc::ProductProcess) = + (source_shape(markov_process(proc))..., num_states(automaton(proc))) action_values(proc::ProductProcess) = action_values(markov_process(proc)) action_shape(proc::ProductProcess) = action_shape(markov_process(proc)) Base.show(io::IO, proc::ProductProcess) = showsystem(io, "", "", proc) -function showsystem(io::IO, first_prefix, prefix, mdp::ProductProcess{M, D, L}) where {M, D, L} +function showsystem( + io::IO, + first_prefix, + prefix, + mdp::ProductProcess{M, D, L}, +) where {M, D, L} println(io, first_prefix, styled"{code:ProductProcess}") println(io, prefix, "├─ Underlying process:") showsystem(io, prefix * "│ ", prefix * "│ ", markov_process(mdp)) println(io, prefix, "├─ Automaton:") showsystem(io, prefix * "│ ", prefix * "│ ", automaton(mdp)) println(io, prefix, styled"└─ Labelling type: {magenta:$(L)}") # TODO: Improve printing of labelling function -end \ No newline at end of file +end diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index 662182c6..8ee4ce53 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -62,18 +62,27 @@ struct IntervalAmbiguitySets{R, MR <: AbstractMatrix{R}} <: PolytopicAmbiguitySe lower::MR gap::MR - function IntervalAmbiguitySets(lower::MR, gap::MR, check::Val{true}) where {R, MR <: AbstractMatrix{R}} + function IntervalAmbiguitySets( + lower::MR, + gap::MR, + check::Val{true}, + ) where {R, MR <: AbstractMatrix{R}} checkprobabilities(lower, gap) return new{R, MR}(lower, gap) end - function IntervalAmbiguitySets(lower::MR, gap::MR, check::Val{false}) where {R, MR <: AbstractMatrix{R}} + function IntervalAmbiguitySets( + lower::MR, + gap::MR, + check::Val{false}, + ) where {R, MR <: AbstractMatrix{R}} return new{R, MR}(lower, gap) end end -IntervalAmbiguitySets(lower::MR, gap::MR) where {R, MR <: AbstractMatrix{R}} = IntervalAmbiguitySets(lower, gap, Val(true)) +IntervalAmbiguitySets(lower::MR, gap::MR) where {R, MR <: AbstractMatrix{R}} = + IntervalAmbiguitySets(lower, gap, Val(true)) # Keyword constructor from lower and upper function IntervalAmbiguitySets(; lower::MR, upper::MR) where {MR <: AbstractMatrix} @@ -121,11 +130,17 @@ function checkprobabilities(lower::AbstractMatrix, gap::AbstractMatrix) end if any(lower .< 0) - throw(ArgumentError("The lower bound transition probabilities must be non-negative.")) + throw( + ArgumentError("The lower bound transition probabilities must be non-negative."), + ) end if any(lower .> 1) - throw(ArgumentError("The lower bound transition probabilities must be less than or equal to 1.")) + throw( + ArgumentError( + "The lower bound transition probabilities must be less than or equal to 1.", + ), + ) end if any(gap .< 0) @@ -133,23 +148,39 @@ function checkprobabilities(lower::AbstractMatrix, gap::AbstractMatrix) end if any(gap .> 1) - throw(ArgumentError("The gap transition probabilities must be less than or equal to 1.")) + throw( + ArgumentError( + "The gap transition probabilities must be less than or equal to 1.", + ), + ) end if any(lower .+ gap .> 1) - throw(ArgumentError("The sum of lower and gap transition probabilities must be less than or equal to 1.")) + throw( + ArgumentError( + "The sum of lower and gap transition probabilities must be less than or equal to 1.", + ), + ) end sum_lower = vec(sum(lower; dims = 1)) max_lower_bound = maximum(sum_lower) if max_lower_bound > 1 - throw(ArgumentError("The joint lower bound transition probability per column (max is $max_lower_bound) should be less than or equal to 1.")) + throw( + ArgumentError( + "The joint lower bound transition probability per column (max is $max_lower_bound) should be less than or equal to 1.", + ), + ) end sum_upper = sum_lower .+ vec(sum(gap; dims = 1)) max_upper_bound = minimum(sum_upper) if max_upper_bound < 1 - throw(ArgumentError("The joint upper bound transition probability per column (min is $max_upper_bound) should be greater than or equal to 1.")) + throw( + ArgumentError( + "The joint upper bound transition probability per column (min is $max_upper_bound) should be greater than or equal to 1.", + ), + ) end end @@ -159,11 +190,17 @@ function checkprobabilities!(lower::AbstractSparseMatrix, gap::AbstractSparseMat end if any(nonzeros(lower) .< 0) - throw(ArgumentError("The lower bound transition probabilities must be non-negative.")) + throw( + ArgumentError("The lower bound transition probabilities must be non-negative."), + ) end if any(nonzeros(lower) .> 1) - throw(ArgumentError("The lower bound transition probabilities must be less than or equal to 1.")) + throw( + ArgumentError( + "The lower bound transition probabilities must be less than or equal to 1.", + ), + ) end if any(nonzeros(gap) .< 0) @@ -171,23 +208,39 @@ function checkprobabilities!(lower::AbstractSparseMatrix, gap::AbstractSparseMat end if any(nonzeros(gap) .> 1) - throw(ArgumentError("The gap transition probabilities must be less than or equal to 1.")) + throw( + ArgumentError( + "The gap transition probabilities must be less than or equal to 1.", + ), + ) end if any(nonzeros(lower) .+ nonzeros(gap) .> 1) - throw(ArgumentError("The sum of lower and gap transition probabilities must be less than or equal to 1.")) + throw( + ArgumentError( + "The sum of lower and gap transition probabilities must be less than or equal to 1.", + ), + ) end sum_lower = vec(sum(lower; dims = 1)) max_lower_bound = maximum(sum_lower) if max_lower_bound > 1 - throw(ArgumentError("The joint lower bound transition probability per column (max is $max_lower_bound) should be less than or equal to 1.")) + throw( + ArgumentError( + "The joint lower bound transition probability per column (max is $max_lower_bound) should be less than or equal to 1.", + ), + ) end sum_upper = sum_lower .+ vec(sum(gap; dims = 1)) max_upper_bound = minimum(sum_upper) if max_upper_bound < 1 - throw(ArgumentError("The joint upper bound transition probability per column (min is $max_upper_bound) should be greater than or equal to 1.")) + throw( + ArgumentError( + "The joint upper bound transition probability per column (min is $max_upper_bound) should be greater than or equal to 1.", + ), + ) end end @@ -198,8 +251,12 @@ source_shape(p::IntervalAmbiguitySets) = (num_sets(p),) action_shape(::IntervalAmbiguitySets) = (1,) marginals(p::IntervalAmbiguitySets) = (p,) -maxsupportsize(p::IntervalAmbiguitySets{R, MR}) where {R, MR <: AbstractMatrix{R}} = size(p.gap, 1) -maxsupportsize(p::IntervalAmbiguitySets{R, MR}) where {R, MR <: SparseArrays.AbstractSparseMatrixCSC{R}} = maxdiff(SparseArrays.getcolptr(p.gap)) +maxsupportsize(p::IntervalAmbiguitySets{R, MR}) where {R, MR <: AbstractMatrix{R}} = + size(p.gap, 1) +maxsupportsize( + p::IntervalAmbiguitySets{R, MR}, +) where {R, MR <: SparseArrays.AbstractSparseMatrixCSC{R}} = + maxdiff(SparseArrays.getcolptr(p.gap)) function Base.getindex(p::IntervalAmbiguitySets, j::Integer) # Select by columns only! @@ -209,8 +266,13 @@ function Base.getindex(p::IntervalAmbiguitySets, j::Integer) return IntervalAmbiguitySet(l, g) end -sub2ind(::IntervalAmbiguitySets, jₐ::NTuple{M, T}, jₛ::NTuple{N, T}) where {N, M, T <: Integer} = T(jₛ[1]) -sub2ind(p::IntervalAmbiguitySets, jₐ::CartesianIndex, jₛ::CartesianIndex) = sub2ind(p, Tuple(jₐ), Tuple(jₛ)) +sub2ind( + ::IntervalAmbiguitySets, + jₐ::NTuple{M, T}, + jₛ::NTuple{N, T}, +) where {N, M, T <: Integer} = T(jₛ[1]) +sub2ind(p::IntervalAmbiguitySets, jₐ::CartesianIndex, jₛ::CartesianIndex) = + sub2ind(p, Tuple(jₐ), Tuple(jₛ)) Base.getindex(p::IntervalAmbiguitySets, jₐ, jₛ) = p[sub2ind(p, jₐ, jₛ)] Base.iterate(p::IntervalAmbiguitySets) = (p[1], 2) @@ -223,25 +285,45 @@ function Base.iterate(p::IntervalAmbiguitySets, state) end Base.length(p::IntervalAmbiguitySets) = num_sets(p) -function showambiguitysets(io::IO, prefix, ::IntervalAmbiguitySets{R, MR}) where {R, MR <: AbstractMatrix} +function showambiguitysets( + io::IO, + prefix, + ::IntervalAmbiguitySets{R, MR}, +) where {R, MR <: AbstractMatrix} println(io, prefix, styled"└─ Ambiguity set type: Interval (dense, {code:$MR})") end -function showambiguitysets(io::IO, prefix, p::IntervalAmbiguitySets{R, MR}) where {R, MR <: AbstractSparseMatrix} +function showambiguitysets( + io::IO, + prefix, + p::IntervalAmbiguitySets{R, MR}, +) where {R, MR <: AbstractSparseMatrix} println(io, prefix, styled"├─ Ambiguity set type: Interval (sparse, {code:$MR})") num_transitions = nnz(p.gap) max_support = maxsupportsize(p) - println(io, prefix, styled"└─ Transitions: {magenta: $num_transitions (max support: $max_support)}") + println( + io, + prefix, + styled"└─ Transitions: {magenta: $num_transitions (max support: $max_support)}", + ) end -function Base.show(io::IO, mime::MIME"text/plain", p::IntervalAmbiguitySets{R, MR}) where {R, MR <: AbstractMatrix} +function Base.show( + io::IO, + mime::MIME"text/plain", + p::IntervalAmbiguitySets{R, MR}, +) where {R, MR <: AbstractMatrix} println(io, styled"{code:IntervalAmbiguitySets}") println(io, styled"├─ Storage type: {code:$MR}") println(io, "├─ Number of target states: ", num_target(p)) println(io, "└─ Number of ambiguity sets: ", num_sets(p)) end -function Base.show(io::IO, mime::MIME"text/plain", p::IntervalAmbiguitySets{R, MR}) where {R, MR <: AbstractSparseMatrix} +function Base.show( + io::IO, + mime::MIME"text/plain", + p::IntervalAmbiguitySets{R, MR}, +) where {R, MR <: AbstractSparseMatrix} println(io, styled"{code:IntervalAmbiguitySets}") println(io, styled"├─ Storage type: {code:$MR}") println(io, "├─ Number of target states: ", num_target(p)) @@ -281,11 +363,17 @@ Return the gap between upper and lower bound transition probabilities of the amb gap(p::IntervalAmbiguitySet) = p.gap gap(p::IntervalAmbiguitySet, destination) = p.gap[destination] -const ColumnView{Tv} = SubArray{Tv, 1, <:AbstractMatrix{Tv}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} +const ColumnView{Tv} = + SubArray{Tv, 1, <:AbstractMatrix{Tv}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} support(p::IntervalAmbiguitySet{R, <:ColumnView{R}}) where {R} = eachindex(p.gap) supportsize(p::IntervalAmbiguitySet{R, <:ColumnView{R}}) where {R} = length(p.gap) -const SparseColumnView{Tv, Ti} = SubArray{Tv, 1, <:SparseArrays.AbstractSparseMatrixCSC{Tv, Ti}, Tuple{Base.Slice{Base.OneTo{Int}}, Int}} +const SparseColumnView{Tv, Ti} = SubArray{ + Tv, + 1, + <:SparseArrays.AbstractSparseMatrixCSC{Tv, Ti}, + Tuple{Base.Slice{Base.OneTo{Int}}, Int}, +} support(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) where {R} = rowvals(p.gap) supportsize(p::IntervalAmbiguitySet{R, <:SparseColumnView{R}}) where {R} = nnz(p.gap) @@ -304,7 +392,9 @@ Base.IteratorEltype(::Type{<:IntervalAmbiguitySetVertexIterator}) = Base.HasElty Base.eltype(::IntervalAmbiguitySetVertexIterator{R}) where {R} = Vector{R} Base.IteratorSize(::Type{<:IntervalAmbiguitySetVertexIterator}) = Base.SizeUnknown() -function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}) where {R, VR <: AbstractVector{R}} +function Base.iterate( + it::IntervalAmbiguitySetVertexIterator{R, VR}, +) where {R, VR <: AbstractVector{R}} permutation = collect(1:length(support(it.set))) v = it.result @@ -327,7 +417,10 @@ function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}) where {R, V return v, (permutation, break_idx) end -function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}, state) where {R, VR <: AbstractVector{R}} +function Base.iterate( + it::IntervalAmbiguitySetVertexIterator{R, VR}, + state, +) where {R, VR <: AbstractVector{R}} (permutation, last_break_idx) = state # Skip permutations that would lead to the same vertex @@ -336,7 +429,7 @@ function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}, state) wher for j in last_break_idx:-1:1 # Find smallest permutation[k] in permutation[j+1:end] where permutation[j] < permutation[k] next_in_suffix = nothing - for k in j+1:length(permutation) + for k in (j + 1):length(permutation) if permutation[k] > permutation[j] if isnothing(next_in_suffix) || permutation[k] < permutation[next_in_suffix] next_in_suffix = k @@ -349,7 +442,8 @@ function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}, state) wher end # Swap - permutation[j], permutation[next_in_suffix] = permutation[next_in_suffix], permutation[j] + permutation[j], permutation[next_in_suffix] = + permutation[next_in_suffix], permutation[j] break_j = j break end @@ -358,7 +452,7 @@ function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}, state) wher return nothing end - sort!(@view(permutation[break_j+1:end])) + sort!(@view(permutation[(break_j + 1):end])) # Now compute the vertex for this new permutation v = it.result @@ -386,5 +480,6 @@ function Base.iterate(it::IntervalAmbiguitySetVertexIterator{R, VR}, state) wher end vertex_generator(p::IntervalAmbiguitySet) = IntervalAmbiguitySetVertexIterator(p) -vertex_generator(p::IntervalAmbiguitySet, result::Vector) = IntervalAmbiguitySetVertexIterator(p, result) -vertices(p::IntervalAmbiguitySet) = map(copy, vertex_generator(p)) \ No newline at end of file +vertex_generator(p::IntervalAmbiguitySet, result::Vector) = + IntervalAmbiguitySetVertexIterator(p, result) +vertices(p::IntervalAmbiguitySet) = map(copy, vertex_generator(p)) diff --git a/src/probabilities/Marginal.jl b/src/probabilities/Marginal.jl index 46ed313b..1813567c 100644 --- a/src/probabilities/Marginal.jl +++ b/src/probabilities/Marginal.jl @@ -33,7 +33,14 @@ function Marginal( checkindices(ambiguity_sets, state_indices, action_indices, source_dims, action_vars) linear_index = LinearIndices((action_vars..., source_dims...)) - return Marginal(ambiguity_sets, state_indices, action_indices, source_dims, action_vars, linear_index) + return Marginal( + ambiguity_sets, + state_indices, + action_indices, + source_dims, + action_vars, + linear_index, + ) end function Marginal( @@ -49,20 +56,44 @@ function Marginal( source_dims_32 = Int32.(source_dims) action_vars_32 = Int32.(action_vars) - return Marginal(ambiguity_sets, state_indices_32, action_indices_32, source_dims_32, action_vars_32) + return Marginal( + ambiguity_sets, + state_indices_32, + action_indices_32, + source_dims_32, + action_vars_32, + ) end -function Marginal(ambiguity_sets::A, source_dims, action_vars) where {A <: AbstractAmbiguitySets} +function Marginal( + ambiguity_sets::A, + source_dims, + action_vars, +) where {A <: AbstractAmbiguitySets} return Marginal(ambiguity_sets, (1,), (1,), source_dims, action_vars) end -function checkindices(ambiguity_sets, state_indices, action_indices, source_dims, action_vars) +function checkindices( + ambiguity_sets, + state_indices, + action_indices, + source_dims, + action_vars, +) if length(state_indices) != length(source_dims) - throw(ArgumentError("Length of state indices must match length of source dimensions.")) + throw( + ArgumentError( + "Length of state indices must match length of source dimensions.", + ), + ) end if length(action_indices) != length(action_vars) - throw(ArgumentError("Length of action indices must match length of action dimensions.")) + throw( + ArgumentError( + "Length of action indices must match length of action dimensions.", + ), + ) end if any(state_indices .<= 0) @@ -72,7 +103,7 @@ function checkindices(ambiguity_sets, state_indices, action_indices, source_dims if any(action_indices .<= 0) throw(ArgumentError("Action indices must be positive.")) end - + if any(source_dims .<= 0) throw(ArgumentError("Source dimensions must be positive.")) end @@ -82,7 +113,11 @@ function checkindices(ambiguity_sets, state_indices, action_indices, source_dims end if prod(source_dims) * prod(action_vars) != num_sets(ambiguity_sets) - throw(ArgumentError("The number of ambiguity sets must match the product of source dimensions and action dimensions.")) + throw( + ArgumentError( + "The number of ambiguity sets must match the product of source dimensions and action dimensions.", + ), + ) end end @@ -139,8 +174,13 @@ The selected index is then converted to a linear index for the underlying ambigu """ Base.getindex(p::Marginal, action, source) = ambiguity_sets(p)[sub2ind(p, action, source)] -sub2ind(p::Marginal, action::CartesianIndex, source::CartesianIndex) = sub2ind(p, Tuple(action), Tuple(source)) -function sub2ind(p::Marginal, action::NTuple{M, T}, source::NTuple{N, T}) where {N, M, T <: Integer} +sub2ind(p::Marginal, action::CartesianIndex, source::CartesianIndex) = + sub2ind(p, Tuple(action), Tuple(source)) +function sub2ind( + p::Marginal, + action::NTuple{M, T}, + source::NTuple{N, T}, +) where {N, M, T <: Integer} action = getindex.((action,), p.action_indices) source = getindex.((source,), p.state_indices) j = p.linear_index[action..., source...] @@ -149,11 +189,15 @@ function sub2ind(p::Marginal, action::NTuple{M, T}, source::NTuple{N, T}) where end function showmarginal(io::IO, prefix, marginal::Marginal) - println(io, prefix, styled"├─ Conditional variables: {magenta:states = $(state_variables(marginal)), actions = $(action_variables(marginal))}") + println( + io, + prefix, + styled"├─ Conditional variables: {magenta:states = $(state_variables(marginal)), actions = $(action_variables(marginal))}", + ) showambiguitysets(io, prefix, ambiguity_sets(marginal)) end function Base.show(io::IO, mime::MIME"text/plain", marginal::Marginal) println(io, styled"{code:Marginal}") showmarginal(io, "", marginal) -end \ No newline at end of file +end diff --git a/src/probabilities/probabilities.jl b/src/probabilities/probabilities.jl index fa2b1cc9..7f0e0828 100644 --- a/src/probabilities/probabilities.jl +++ b/src/probabilities/probabilities.jl @@ -42,7 +42,13 @@ isinterval(::IntervalAmbiguitySets) = IsInterval() # Marginals include("Marginal.jl") -export Marginal, ambiguity_sets, state_variables, action_variables, source_shape, action_shape, num_target +export Marginal, + ambiguity_sets, + state_variables, + action_variables, + source_shape, + action_shape, + num_target ispolytopic(marginal::Marginal) = ispolytopic(ambiguity_sets(marginal)) isinterval(marginal::Marginal) = isinterval(ambiguity_sets(marginal)) diff --git a/src/problem.jl b/src/problem.jl index e4743006..7c286529 100644 --- a/src/problem.jl +++ b/src/problem.jl @@ -67,7 +67,11 @@ function Base.show(io::IO, mime::MIME"text/plain", prob::VerificationProblem) if !(prob.strategy isa NoStrategy) showstrategy(io, "└─ ", " ", strategy(prob)) else - println(io, "└─ ", styled"No strategy provided (selecting optimal actions at every step)") + println( + io, + "└─ ", + styled"No strategy provided (selecting optimal actions at every step)", + ) end end @@ -192,4 +196,4 @@ function Base.show(io::IO, mime::MIME"text/plain", prob::ControlSynthesisProblem println(io, styled"{code:ControlSynthesisProblem}") showsystem(io, "├─ ", "│ ", system(prob)) showspecification(io, "└─ ", " ", specification(prob)) -end \ No newline at end of file +end diff --git a/src/specification.jl b/src/specification.jl index 6a0c6d04..cf805fa3 100644 --- a/src/specification.jl +++ b/src/specification.jl @@ -129,7 +129,6 @@ function checkproperty(prop::FiniteTimeDFAReachability, system) checkstatebounds(reach(prop), system) end - isfinitetime(prop::FiniteTimeDFAReachability) = true """ @@ -197,7 +196,11 @@ reach(prop::InfiniteTimeDFAReachability) = prop.reach function showproperty(io::IO, first_prefix, prefix, prop::InfiniteTimeDFAReachability) println(io, first_prefix, styled"{code:InfiniteTimeDFAReachability}") - println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}") + println( + io, + prefix, + styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}", + ) println(io, prefix, styled"└─ Reach states: {magenta:$(reach(prop))}") end @@ -317,7 +320,11 @@ reach(prop::InfiniteTimeReachability) = prop.reach function showproperty(io::IO, first_prefix, prefix, prop::InfiniteTimeReachability) println(io, first_prefix, styled"{code:InfiniteTimeReachability}") - println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}") + println( + io, + prefix, + styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}", + ) println(io, prefix, styled"└─ Reach states: {magenta:$(reach(prop))}") end @@ -549,7 +556,11 @@ avoid(prop::InfiniteTimeReachAvoid) = prop.avoid function showproperty(io::IO, first_prefix, prefix, prop::InfiniteTimeReachAvoid) println(io, first_prefix, styled"{code:InfiniteTimeReachAvoid}") - println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}") + println( + io, + prefix, + styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}", + ) println(io, prefix, styled"├─ Reach states: {magenta:$(reach(prop))}") println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(prop))}") end @@ -742,7 +753,11 @@ avoid(prop::InfiniteTimeSafety) = prop.avoid function showproperty(io::IO, first_prefix, prefix, prop::InfiniteTimeSafety) println(io, first_prefix, styled"{code:InfiniteTimeSafety}") - println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}") + println( + io, + prefix, + styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}", + ) println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(prop))}") end @@ -834,7 +849,11 @@ function showproperty(io::IO, first_prefix, prefix, prop::FiniteTimeReward) println(io, first_prefix, styled"{code:FiniteTimeReward}") println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(prop))}") println(io, prefix, styled"├─ Discount factor: {magenta:$(discount(prop))}") - println(io, prefix, styled"└─ Reward storage: {magenta:$(eltype(reward(prop))), $(size(reward(prop)))}") + println( + io, + prefix, + styled"└─ Reward storage: {magenta:$(eltype(reward(prop))), $(size(reward(prop)))}", + ) end """ @@ -895,9 +914,17 @@ convergence_eps(prop::InfiniteTimeReward) = prop.convergence_eps function showproperty(io::IO, first_prefix, prefix, prop::InfiniteTimeReward) println(io, first_prefix, styled"{code:InfiniteTimeReward}") - println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}") + println( + io, + prefix, + styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}", + ) println(io, prefix, styled"├─ Discount factor: {magenta:$(discount(prop))}") - println(io, prefix, styled"└─ Reward storage: {magenta:$(eltype(reward(prop))), $(size(reward(prop)))}") + println( + io, + prefix, + styled"└─ Reward storage: {magenta:$(eltype(reward(prop))), $(size(reward(prop)))}", + ) end ## Hitting time @@ -970,7 +997,11 @@ convergence_eps(prop::ExpectedExitTime) = prop.convergence_eps function showproperty(io::IO, first_prefix, prefix, prop::ExpectedExitTime) println(io, first_prefix, styled"{code:ExpectedExitTime}") - println(io, prefix, styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}") + println( + io, + prefix, + styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}", + ) println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(prop))}") end @@ -1071,4 +1102,4 @@ function showspecification(io::IO, first_prefix, prefix, spec::Specification) println(io, prefix, styled"├─ Satisfaction mode: {magenta:$(satisfaction_mode(spec))}") println(io, prefix, styled"├─ Strategy mode: {magenta:$(strategy_mode(spec))}") showproperty(io, prefix * "└─ Property: ", prefix * " ", system_property(spec)) -end \ No newline at end of file +end diff --git a/src/strategy.jl b/src/strategy.jl index 0df6e091..24bbeacc 100644 --- a/src/strategy.jl +++ b/src/strategy.jl @@ -87,4 +87,4 @@ function showstrategy(io::IO, first_prefix, prefix, strategy::TimeVaryingStrateg println(io, first_prefix, styled"{code:TimeVaryingStrategy}") println(io, prefix, styled"├─ Time length: {magenta:$(length(strategy.strategy))}") println(io, prefix, styled"└─ Strategy shape: {magenta:$(size(strategy.strategy[1]))}") -end \ No newline at end of file +end diff --git a/src/strategy_cache.jl b/src/strategy_cache.jl index 3f0f6b92..caef88f6 100644 --- a/src/strategy_cache.jl +++ b/src/strategy_cache.jl @@ -13,12 +13,7 @@ function construct_strategy_cache end # Strategy cache for not storing policies - useful for dispatching struct NoStrategyCache <: OptimizingStrategyCache end -function construct_strategy_cache( - ::Union{ - <:AbstractAmbiguitySets, - <:StochasticProcess, - }, -) +function construct_strategy_cache(::Union{<:AbstractAmbiguitySets, <:StochasticProcess}) return NoStrategyCache() end @@ -39,7 +34,8 @@ construct_strategy_cache(problem::VerificationProblem{S, F, C}) where {S, F, C} GivenStrategyCache(strategy(problem)) time_length(cache::GivenStrategyCache) = time_length(cache.strategy) -struct ActiveGivenStrategyCache{N, A <: AbstractArray{NTuple{N, Int32}}} <: NonOptimizingStrategyCache +struct ActiveGivenStrategyCache{N, A <: AbstractArray{NTuple{N, Int32}}} <: + NonOptimizingStrategyCache strategy::A end Base.getindex(cache::GivenStrategyCache, k) = ActiveGivenStrategyCache(cache.strategy[k]) @@ -53,12 +49,15 @@ construct_strategy_cache(problem::ControlSynthesisProblem) = construct_strategy_ ) # Strategy cache for storing time-varying policies -struct TimeVaryingStrategyCache{N, A <: AbstractArray{NTuple{N, Int32}}} <: OptimizingStrategyCache +struct TimeVaryingStrategyCache{N, A <: AbstractArray{NTuple{N, Int32}}} <: + OptimizingStrategyCache cur_strategy::A strategy::Vector{A} end -function TimeVaryingStrategyCache(cur_strategy::A) where {N, A <: AbstractArray{NTuple{N, Int32}}} +function TimeVaryingStrategyCache( + cur_strategy::A, +) where {N, A <: AbstractArray{NTuple{N, Int32}}} return TimeVaryingStrategyCache(cur_strategy, Vector{A}()) end @@ -70,7 +69,8 @@ function construct_strategy_cache(problem::ControlSynthesisProblem, time_varying return TimeVaryingStrategyCache(cur_strategy) end -cachetostrategy(strategy_cache::TimeVaryingStrategyCache) = TimeVaryingStrategy(collect(reverse(strategy_cache.strategy))) +cachetostrategy(strategy_cache::TimeVaryingStrategyCache) = + TimeVaryingStrategy(collect(reverse(strategy_cache.strategy))) function extract_strategy!( strategy_cache::TimeVaryingStrategyCache, @@ -90,7 +90,8 @@ function step_postprocess_strategy_cache!(strategy_cache::TimeVaryingStrategyCac end # Strategy cache for storing stationary policies -struct StationaryStrategyCache{N, A <: AbstractArray{NTuple{N, Int32}}} <: OptimizingStrategyCache +struct StationaryStrategyCache{N, A <: AbstractArray{NTuple{N, Int32}}} <: + OptimizingStrategyCache strategy::A end @@ -105,7 +106,8 @@ function construct_strategy_cache( return StationaryStrategyCache(strategy) end -cachetostrategy(strategy_cache::StationaryStrategyCache) = StationaryStrategy(strategy_cache.strategy) +cachetostrategy(strategy_cache::StationaryStrategyCache) = + StationaryStrategy(strategy_cache.strategy) function extract_strategy!( strategy_cache::StationaryStrategyCache, diff --git a/src/utils.jl b/src/utils.jl index 9c680953..a00de24d 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,14 +1,11 @@ @inline @inbounds maxdiff(x::V) where {V <: AbstractVector} = maximum(x[i + 1] - x[i] for i in 1:(length(x) - 1)) -arrayfactory(mp::ProductProcess, T, sizes) = - arrayfactory(markov_process(mp), T, sizes) -arrayfactory(mp::FactoredRMDP, T, sizes) = - arrayfactory(marginals(mp)[1], T, sizes) +arrayfactory(mp::ProductProcess, T, sizes) = arrayfactory(markov_process(mp), T, sizes) +arrayfactory(mp::FactoredRMDP, T, sizes) = arrayfactory(marginals(mp)[1], T, sizes) arrayfactory(marginal::Marginal, T, sizes) = arrayfactory(ambiguity_sets(marginal), T, sizes) -arrayfactory(prob::IntervalAmbiguitySets, T, sizes) = - arrayfactory(prob.gap, T, sizes) +arrayfactory(prob::IntervalAmbiguitySets, T, sizes) = arrayfactory(prob.gap, T, sizes) arrayfactory(::MR, T, sizes) where {MR <: AbstractArray} = Array{T}(undef, sizes) function valuetype(prob::AbstractIntervalMDPProblem) @@ -36,4 +33,4 @@ valuetype(::NoStrategy) = nothing valuetype(::Property) = nothing valuetype(::FiniteTimeReward{R}) where {R} = R -valuetype(::InfiniteTimeReward{R}) where {R} = R \ No newline at end of file +valuetype(::InfiniteTimeReward{R}) where {R} = R diff --git a/src/workspace.jl b/src/workspace.jl index b5aba092..3642bade 100644 --- a/src/workspace.jl +++ b/src/workspace.jl @@ -16,7 +16,11 @@ struct ProductWorkspace{W, MT <: AbstractArray} intermediate_values::MT end -function construct_workspace(proc::ProductProcess, alg=default_bellman_algorithm(proc); kwargs...) +function construct_workspace( + proc::ProductProcess, + alg = default_bellman_algorithm(proc); + kwargs..., +) mp = markov_process(proc) underlying_workspace = construct_workspace(mp, alg; kwargs...) intermediate_values = arrayfactory(mp, valuetype(mp), state_values(mp)) @@ -24,17 +28,24 @@ function construct_workspace(proc::ProductProcess, alg=default_bellman_algorithm return ProductWorkspace(underlying_workspace, intermediate_values) end -construct_workspace(mdp::FactoredRMDP, alg=default_bellman_algorithm(mdp); kwargs...) = construct_workspace(mdp, modeltype(mdp), alg; kwargs...) +construct_workspace(mdp::FactoredRMDP, alg = default_bellman_algorithm(mdp); kwargs...) = + construct_workspace(mdp, modeltype(mdp), alg; kwargs...) function construct_workspace( sys::FactoredRMDP, ::IsIMDP, ::OMaximization; threshold = 10, - kwargs... + kwargs..., ) prob = ambiguity_sets(marginals(sys)[1]) - return construct_workspace(prob, OMaximization(); threshold = threshold, num_actions = num_actions(sys), kwargs...) + return construct_workspace( + prob, + OMaximization(); + threshold = threshold, + num_actions = num_actions(sys), + kwargs..., + ) end # Dense @@ -45,7 +56,10 @@ struct DenseIntervalOMaxWorkspace{T <: Real} actions::Vector{T} end -function DenseIntervalOMaxWorkspace(ambiguity_set::IntervalAmbiguitySets{R}, nactions) where {R <: Real} +function DenseIntervalOMaxWorkspace( + ambiguity_set::IntervalAmbiguitySets{R}, + nactions, +) where {R <: Real} budget = 1 .- vec(sum(ambiguity_set.lower; dims = 1)) scratch = Vector{Int32}(undef, num_target(ambiguity_set)) perm = Vector{Int32}(undef, num_target(ambiguity_set)) @@ -60,14 +74,17 @@ struct ThreadedDenseIntervalOMaxWorkspace{T <: Real} thread_workspaces::Vector{DenseIntervalOMaxWorkspace{T}} end -function ThreadedDenseIntervalOMaxWorkspace(ambiguity_set::IntervalAmbiguitySets{R}, nactions) where {R <: Real} +function ThreadedDenseIntervalOMaxWorkspace( + ambiguity_set::IntervalAmbiguitySets{R}, + nactions, +) where {R <: Real} budget = 1 .- vec(sum(ambiguity_set.lower; dims = 1)) scratch = Vector{Int32}(undef, num_target(ambiguity_set)) perm = Vector{Int32}(undef, num_target(ambiguity_set)) workspaces = [ - DenseIntervalOMaxWorkspace(budget, scratch, perm, Vector{R}(undef, nactions)) for - _ in 1:Threads.nthreads() + DenseIntervalOMaxWorkspace(budget, scratch, perm, Vector{R}(undef, nactions)) + for _ in 1:Threads.nthreads() ] return ThreadedDenseIntervalOMaxWorkspace(workspaces) end @@ -75,13 +92,16 @@ end Base.getindex(ws::ThreadedDenseIntervalOMaxWorkspace, i) = ws.thread_workspaces[i] ## permutation and scratch space is shared across threads -permutation(ws::ThreadedDenseIntervalOMaxWorkspace) = permutation(first(ws.thread_workspaces)) +permutation(ws::ThreadedDenseIntervalOMaxWorkspace) = + permutation(first(ws.thread_workspaces)) scratch(ws::ThreadedDenseIntervalOMaxWorkspace) = scratch(first(ws.thread_workspaces)) function construct_workspace( prob::IntervalAmbiguitySets{R, MR}, ::OMaximization = default_bellman_algorithm(prob); - threshold = 10, num_actions = 1, kwargs... + threshold = 10, + num_actions = 1, + kwargs..., ) where {R, MR <: AbstractMatrix{R}} if Threads.nthreads() == 1 || num_sets(prob) <= threshold return DenseIntervalOMaxWorkspace(prob, num_actions) @@ -98,7 +118,10 @@ struct SparseIntervalOMaxWorkspace{T <: Real} actions::Vector{T} end -function SparseIntervalOMaxWorkspace(ambiguity_sets::IntervalAmbiguitySets{R}, nactions) where {R <: Real} +function SparseIntervalOMaxWorkspace( + ambiguity_sets::IntervalAmbiguitySets{R}, + nactions, +) where {R <: Real} max_support = maxsupportsize(ambiguity_sets) budget = 1 .- vec(sum(ambiguity_sets.lower; dims = 1)) @@ -114,9 +137,13 @@ struct ThreadedSparseIntervalOMaxWorkspace{T <: Real} thread_workspaces::Vector{SparseIntervalOMaxWorkspace{T}} end -function ThreadedSparseIntervalOMaxWorkspace(ambiguity_sets::IntervalAmbiguitySets, nactions) +function ThreadedSparseIntervalOMaxWorkspace( + ambiguity_sets::IntervalAmbiguitySets, + nactions, +) nthreads = Threads.nthreads() - thread_workspaces = [SparseIntervalOMaxWorkspace(ambiguity_sets, nactions) for _ in 1:nthreads] + thread_workspaces = + [SparseIntervalOMaxWorkspace(ambiguity_sets, nactions) for _ in 1:nthreads] return ThreadedSparseIntervalOMaxWorkspace(thread_workspaces) end @@ -127,7 +154,7 @@ function construct_workspace( ::OMaximization = default_bellman_algorithm(prob); threshold = 10, num_actions = 1, - kwargs... + kwargs..., ) where {R, MR <: AbstractSparseMatrix{R}} if Threads.nthreads() == 1 || num_sets(prob) <= threshold return SparseIntervalOMaxWorkspace(prob, num_actions) @@ -137,7 +164,11 @@ function construct_workspace( end # Factored interval McCormick workspace -struct FactoredIntervalMcCormickWorkspace{M <: JuMP.Model, T <: Real, AT <: AbstractArray{T}} +struct FactoredIntervalMcCormickWorkspace{ + M <: JuMP.Model, + T <: Real, + AT <: AbstractArray{T}, +} model::M actions::AT end @@ -152,7 +183,11 @@ function FactoredIntervalMcCormickWorkspace(sys, alg) return FactoredIntervalMcCormickWorkspace(model, actions) end -struct ThreadedFactoredIntervalMcCormickWorkspace{M <: JuMP.Model, T <: Real, AT <: AbstractArray{T}} +struct ThreadedFactoredIntervalMcCormickWorkspace{ + M <: JuMP.Model, + T <: Real, + AT <: AbstractArray{T}, +} thread_workspaces::Vector{FactoredIntervalMcCormickWorkspace{M, T, AT}} end @@ -168,7 +203,7 @@ function construct_workspace( ::Union{IsFIMDP, IsIMDP}, alg::LPMcCormickRelaxation; threshold = 10, - kwargs... + kwargs..., ) if Threads.nthreads() == 1 || num_states(sys) <= threshold return FactoredIntervalMcCormickWorkspace(sys, alg) @@ -190,17 +225,26 @@ function FactoredIntervalOMaxWorkspace(sys::FactoredRMDP) N = length(marginals(sys)) R = valuetype(sys) - max_support_per_marginal = Tuple(maxsupportsize(ambiguity_sets(marginal)) for marginal in marginals(sys)) + max_support_per_marginal = + Tuple(maxsupportsize(ambiguity_sets(marginal)) for marginal in marginals(sys)) max_support = maximum(max_support_per_marginal) - expectation_cache = NTuple{N - 1, Vector{R}}(Vector{R}(undef, n) for n in max_support_per_marginal[2:end]) + expectation_cache = NTuple{N - 1, Vector{R}}( + Vector{R}(undef, n) for n in max_support_per_marginal[2:end] + ) values_gaps = Vector{Tuple{R, R}}(undef, max_support) scratch = Vector{Tuple{R, R}}(undef, max_support) budgets = ntuple(r -> one(R) .- vec(sum(ambiguity_sets(sys[r]).lower; dims = 1)), N) actions = Array{R}(undef, action_shape(sys)) - return FactoredIntervalOMaxWorkspace(expectation_cache, values_gaps, scratch, budgets, actions) + return FactoredIntervalOMaxWorkspace( + expectation_cache, + values_gaps, + scratch, + budgets, + actions, + ) end scratch(ws::FactoredIntervalOMaxWorkspace) = ws.scratch @@ -220,7 +264,7 @@ function construct_workspace( ::IsFIMDP, ::OMaximization; threshold = 10, - kwargs... + kwargs..., ) if Threads.nthreads() == 1 || num_states(sys) <= threshold return FactoredIntervalOMaxWorkspace(sys) @@ -262,11 +306,11 @@ function construct_workspace( ::Union{IsFIMDP, IsIMDP}, ::VertexEnumeration; threshold = 10, - kwargs... + kwargs..., ) if Threads.nthreads() == 1 || num_states(sys) <= threshold return FactoredVertexIteratorWorkspace(sys) else return ThreadedFactoredVertexIteratorWorkspace(sys) end -end \ No newline at end of file +end diff --git a/test/base/bellman.jl b/test/base/bellman.jl index d7a3b668..b09107b0 100644 --- a/test/base/bellman.jl +++ b/test/base/bellman.jl @@ -14,40 +14,19 @@ using IntervalMDP ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = zeros(N, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = true, - ) + IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) @test Vres ≈ N[27 // 10, 17 // 10] # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = true, - ) + IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) @test Vres ≈ N[27 // 10, 17 // 10] ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = true, - ) + IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) @test Vres ≈ N[27 // 10, 17 // 10] end @@ -56,40 +35,19 @@ using IntervalMDP ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = zeros(N, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = false, - ) + IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) @test Vres ≈ N[17 // 10, 15 // 10] # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3] ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = false, - ) + IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) @test Vres ≈ N[17 // 10, 15 // 10] ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = false, - ) + IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) @test Vres ≈ N[17 // 10, 15 // 10] end end diff --git a/test/base/factored.jl b/test/base/factored.jl index a9e601b8..3eddd150 100644 --- a/test/base/factored.jl +++ b/test/base/factored.jl @@ -6,7 +6,7 @@ using Random: MersenneTwister @testset "bellman 1d" begin ambiguity_sets = IntervalAmbiguitySets(; lower = N[ - 0 5//10 2//10 + 0 5//10 2//10 1//10 3//10 3//10 2//10 1//10 5//10 ], @@ -39,7 +39,7 @@ using Random: MersenneTwister @test length(verts) <= 6 # = number of permutations of 3 elements expected_verts = N[ # duplicates due to budget < gap for all elements - 6//10 3//10 1//10 + 6 // 10 3//10 1//10 5//10 4//10 1//10 5//10 3//10 2//10 ] @@ -49,9 +49,7 @@ using Random: MersenneTwister verts = IntervalMDP.vertices(ambiguity_sets[3]) @test length(verts) <= 6 # = number of permutations of 3 elements - expected_verts = N[ # Only one vertex since sum(lower) = 1 - 2//10 3//10 5//10 - ] + expected_verts = N[2 // 10 3//10 5//10] @test length(verts) ≥ size(expected_verts, 1) # at least the unique ones @test all(any(v2 -> v1 ≈ v2, verts) for v1 in eachrow(expected_verts)) end @@ -62,79 +60,41 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + imc, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.construct_workspace(imc, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.FactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.ThreadedFactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected end @@ -144,79 +104,41 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + imc, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.construct_workspace(imc, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.FactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.ThreadedFactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected end end @@ -227,35 +149,51 @@ using Random: MersenneTwister state_vars = (2, 3) action_vars = (1,) - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 7//30 1//15 13//30 4//15 1//6 - 2//5 7//30 1//30 11//30 2//15 1//10 - ], - upper = N[ - 17//30 7//10 2//3 4//5 7//10 2//3 - 9//10 13//15 9//10 5//6 4//5 14//15 - ] - ), state_indices, action_indices, state_vars, action_vars) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//15 7//30 1//15 13//30 4//15 1//6 + 2//5 7//30 1//30 11//30 2//15 1//10 + ], + upper = N[ + 17//30 7//10 2//3 4//5 7//10 2//3 + 9//10 13//15 9//10 5//6 4//5 14//15 + ], + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//30 1//3 1//6 1//15 2//5 2//15 - 4//15 1//4 1//6 1//30 2//15 1//30 - 2//15 7//30 1//10 7//30 7//15 1//5 - ], - upper = N[ - 2//3 7//15 4//5 11//30 19//30 1//2 - 23//30 4//5 23//30 3//5 7//10 8//15 - 7//15 4//5 23//30 7//10 7//15 23//30 - ] - ), state_indices, action_indices, state_vars, action_vars) + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//30 1//3 1//6 1//15 2//5 2//15 + 4//15 1//4 1//6 1//30 2//15 1//30 + 2//15 7//30 1//10 7//30 7//15 1//5 + ], + upper = N[ + 2//3 7//15 4//5 11//30 19//30 1//2 + 23//30 4//5 23//30 3//5 7//10 8//15 + 7//15 4//5 23//30 7//10 7//15 23//30 + ], + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) + mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + (marginal1, marginal2), + ) V = N[ 3 13 18 - 12 16 8 + 12 16 8 ] #### Maximization @@ -263,20 +201,13 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - V_vertex, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = true) @test V_vertex ≈ N[ 1076//75 4279//300 167//15 11107//900 4123//300 121//9 ] - + ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 2, 3) @@ -294,30 +225,20 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) @@ -340,27 +261,13 @@ using Random: MersenneTwister ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax end @@ -369,14 +276,7 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - V_vertex, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = false) @test V_vertex ≈ N[ 4399//450 41//5 488//45 @@ -400,30 +300,20 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .- epsilon .<= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) @@ -446,27 +336,13 @@ using Random: MersenneTwister ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax end end @@ -475,35 +351,51 @@ using Random: MersenneTwister state_vars = (2, 3) action_vars = (1, 2) - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 7//30 1//15 13//30 4//15 1//6 - 2//5 7//30 1//30 11//30 2//15 1//10 - ], - upper = N[ - 17//30 7//10 2//3 4//5 7//10 2//3 - 9//10 13//15 9//10 5//6 4//5 14//15 - ] - ), (1, 2), (1,), (2, 3), (1,)) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//15 7//30 1//15 13//30 4//15 1//6 + 2//5 7//30 1//30 11//30 2//15 1//10 + ], + upper = N[ + 17//30 7//10 2//3 4//5 7//10 2//3 + 9//10 13//15 9//10 5//6 4//5 14//15 + ], + ), + (1, 2), + (1,), + (2, 3), + (1,), + ) - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//30 1//3 1//6 1//15 2//5 2//15 - 4//15 1//4 1//6 1//30 2//15 1//30 - 2//15 7//30 1//10 7//30 7//15 1//5 - ], - upper = N[ - 2//3 7//15 4//5 11//30 19//30 1//2 - 23//30 4//5 23//30 3//5 7//10 8//15 - 7//15 4//5 23//30 7//10 7//15 23//30 - ] - ), (2,), (2,), (3,), (2,)) + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//30 1//3 1//6 1//15 2//5 2//15 + 4//15 1//4 1//6 1//30 2//15 1//30 + 2//15 7//30 1//10 7//30 7//15 1//5 + ], + upper = N[ + 2//3 7//15 4//5 11//30 19//30 1//2 + 23//30 4//5 23//30 3//5 7//10 8//15 + 7//15 4//5 23//30 7//10 7//15 23//30 + ], + ), + (2,), + (2,), + (3,), + (2,), + ) - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) + mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + (marginal1, marginal2), + ) V = N[ 3 13 18 - 12 16 8 + 12 16 8 ] #### Maximization @@ -539,7 +431,8 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -553,7 +446,10 @@ using Random: MersenneTwister ) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -646,7 +542,8 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -660,7 +557,10 @@ using Random: MersenneTwister ) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -754,7 +654,8 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .- epsilon .<= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -768,7 +669,10 @@ using Random: MersenneTwister ) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -861,7 +765,8 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .- epsilon .<= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -875,7 +780,10 @@ using Random: MersenneTwister ) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -943,46 +851,68 @@ using Random: MersenneTwister state_vars = (3, 3, 3) action_vars = (1,) - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1//10 1//30 1//10 1//15 1//10 1//15 4//15 4//15 1//3 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 1//5 1//5 1//10 1//30 4//15 1//10 1//5 1//6 7//30 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 3//10 1//10 1//15 1//30 2//15 1//6 1//5 1//10 4//15 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 17//30 13//30 2//5 2//5 2//3 2//5 17//30 2//5 19//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 8//15 7//15 7//15 13//30 8//15 2//5 8//15 17//30 3//5 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 2//5 2//5 11//30 17//30 17//30 1//2 2//5 19//30 13//30 - ] - ), state_indices, action_indices, state_vars, action_vars) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1//10 1//30 1//10 1//15 1//10 1//15 4//15 4//15 1//3 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 1//5 1//5 1//10 1//30 4//15 1//10 1//5 1//6 7//30 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 3//10 1//10 1//15 1//30 2//15 1//6 1//5 1//10 4//15 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 17//30 13//30 2//5 2//5 2//3 2//5 17//30 2//5 19//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 8//15 7//15 7//15 13//30 8//15 2//5 8//15 17//30 3//5 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 2//5 2//5 11//30 17//30 17//30 1//2 2//5 19//30 13//30 + ], + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1//6 7//30 1//15 2//15 1//10 1//3 7//30 1//30 7//30 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 3//10 0 3//10 1//6 3//10 1//5 0 7//30 2//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 1//15 1//3 2//15 1//15 1//5 1//5 1//15 7//30 1//15 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 13//30 17//30 17//30 13//30 11//30 19//30 8//15 2//5 8//15 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 1//2 8//15 8//15 8//15 8//15 2//5 3//5 2//3 13//30 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 1//3 2//3 17//30 8//15 17//30 3//5 2//5 19//30 11//30 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 7//30 4//15 1//10 1//3 1//5 7//30 1//30 1//5 7//30 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 1//5 4//15 1//10 1//10 1//3 7//30 3//10 1//3 3//10 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 0 1//30 1//15 2//15 1//6 7//30 4//15 4//15 7//30 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 2//3 17//30 3//5 7//15 19//30 1//2 3//5 1//3 19//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 2//3 17//30 8//15 13//30 13//30 3//5 1//2 8//15 8//15 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 0 2//5 11//30 19//30 19//30 2//5 1//2 7//15 7//15 - ] - ), state_indices, action_indices, state_vars, action_vars) + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1//6 7//30 1//15 2//15 1//10 1//3 7//30 1//30 7//30 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 3//10 0 3//10 1//6 3//10 1//5 0 7//30 2//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 1//15 1//3 2//15 1//15 1//5 1//5 1//15 7//30 1//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 13//30 17//30 17//30 13//30 11//30 19//30 8//15 2//5 8//15 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 1//2 8//15 8//15 8//15 8//15 2//5 3//5 2//3 13//30 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 1//3 2//3 17//30 8//15 17//30 3//5 2//5 19//30 11//30 + ], + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + marginal3 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 7//30 4//15 1//10 1//3 1//5 7//30 1//30 1//5 7//30 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 1//5 4//15 1//10 1//10 1//3 7//30 3//10 1//3 3//10 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 0 1//30 1//15 2//15 1//6 7//30 4//15 4//15 7//30 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 2//3 17//30 3//5 7//15 19//30 1//2 3//5 1//3 19//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 2//3 17//30 8//15 13//30 13//30 3//5 1//2 8//15 8//15 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 0 2//5 11//30 19//30 19//30 2//5 1//2 7//15 7//15 + ], + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + (marginal1, marginal2, marginal3), + ) V = N[ 23, @@ -1020,14 +950,7 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - V_vertex, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = true) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -1046,30 +969,20 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) @@ -1092,27 +1005,13 @@ using Random: MersenneTwister ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax end @@ -1121,14 +1020,7 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - V_vertex, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = false) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -1147,30 +1039,20 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .- epsilon .<= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) @@ -1193,32 +1075,22 @@ using Random: MersenneTwister ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax end end - @testset for alg in [RobustValueIteration(LPMcCormickRelaxation()), RobustValueIteration(OMaximization()), RobustValueIteration(VertexEnumeration())] + @testset for alg in [ + RobustValueIteration(LPMcCormickRelaxation()), + RobustValueIteration(OMaximization()), + RobustValueIteration(VertexEnumeration()), + ] @testset "implicit sink state" begin @testset "first dimension" begin state_indices = (1, 2, 3) @@ -1228,88 +1100,133 @@ using Random: MersenneTwister action_vars = (1,) # Explicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 0 1//15 3//10 0 1//30 1//3 0 7//30 4//15 0 1//6 1//5 0 1//10 1//5 0 0 7//30 0 7//30 1//5 0 2//15 1//6 0 - 1//5 4//15 0 1//10 1//5 0 3//10 3//10 0 1//10 1//15 0 3//10 3//10 0 7//30 1//5 0 1//10 1//5 0 1//5 1//30 0 1//5 3//10 0 - 4//15 1//30 1 1//5 1//5 1 7//30 4//15 1 2//15 7//30 1 1//5 1//3 1 2//15 1//6 1 1//6 1//3 1 4//15 3//10 1 1//30 3//10 1 - ], - upper = N[ - 7//15 17//30 0 13//30 3//5 0 17//30 17//30 0 17//30 13//30 0 3//5 2//3 0 11//30 7//15 0 0 1//2 0 17//30 13//30 0 7//15 13//30 0 - 8//15 1//2 0 3//5 7//15 0 8//15 17//30 0 2//3 17//30 0 11//30 7//15 0 19//30 19//30 0 13//15 1//2 0 17//30 13//30 0 3//5 11//30 0 - 11//30 1//3 1 2//5 8//15 1 7//15 3//5 1 2//3 17//30 1 2//3 8//15 1 2//15 3//5 1 2//3 3//5 1 17//30 2//3 1 7//15 8//15 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 1 3//10 0 0 1//6 1//15 0 1//15 1//6 1 1//6 1//30 0 1//10 1//10 0 1//3 2//15 1 3//10 4//15 0 2//15 2//15 0 - 3//10 1//5 0 3//10 2//15 1 0 1//30 0 0 1//15 0 1//30 7//30 1 1//30 1//15 0 7//30 1//15 0 1//6 1//30 1 1//10 1//15 0 - 3//10 4//15 0 1//10 3//10 0 2//15 1//3 1 3//10 1//10 0 1//6 3//10 0 7//30 1//6 1 1//15 1//15 0 1//10 1//5 0 1//5 4//15 1 - ], - upper = N[ - 2//5 17//30 1 3//5 11//30 0 3//5 7//15 0 19//30 2//5 1 3//5 2//3 0 2//3 8//15 0 8//15 19//30 1 8//15 8//15 0 13//30 13//30 0 - 1//3 13//30 0 11//30 2//5 1 2//3 2//3 0 0 13//30 0 1//2 17//30 1 17//30 1//3 0 2//5 1//3 0 13//30 11//30 1 8//15 1//3 0 - 17//30 3//5 0 8//15 1//2 0 7//15 1//2 1 2//3 17//30 0 11//30 2//5 0 1//2 7//15 1 2//5 17//30 0 11//30 2//5 0 11//30 2//3 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 1 3//10 3//10 1 4//15 7//30 1 1//5 4//15 0 7//30 1//6 0 1//5 0 0 1//15 1//30 0 3//10 1//3 0 2//15 1//15 0 - 2//15 4//15 0 1//10 1//30 0 7//30 2//15 0 1//15 1//30 1 3//10 1//3 1 1//5 1//10 1 2//15 1//30 0 2//15 4//15 0 0 4//15 0 - 1//5 1//3 0 3//10 1//10 0 1//15 1//10 0 1//30 1//5 0 2//15 7//30 0 1//3 2//15 0 1//10 1//6 1 3//10 1//5 1 7//30 1//30 1 - ], - upper = N[ - 3//5 17//30 1 1//2 3//5 1 19//30 2//5 1 8//15 1//3 0 11//30 2//5 0 17//30 13//30 0 2//5 3//5 0 3//5 11//30 0 1//2 11//30 0 - 3//5 2//3 0 13//30 19//30 0 1//3 2//5 0 17//30 7//15 1 11//30 3//5 1 19//30 7//15 1 2//5 8//15 0 17//30 11//30 0 19//30 13//30 0 - 3//5 2//3 0 1//2 1//2 0 2//3 7//15 0 3//5 3//5 0 1//2 1//3 0 2//5 8//15 0 2//5 11//30 1 1//3 8//15 1 7//15 13//30 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 0 1//15 3//10 0 1//30 1//3 0 7//30 4//15 0 1//6 1//5 0 1//10 1//5 0 0 7//30 0 7//30 1//5 0 2//15 1//6 0 + 1//5 4//15 0 1//10 1//5 0 3//10 3//10 0 1//10 1//15 0 3//10 3//10 0 7//30 1//5 0 1//10 1//5 0 1//5 1//30 0 1//5 3//10 0 + 4//15 1//30 1 1//5 1//5 1 7//30 4//15 1 2//15 7//30 1 1//5 1//3 1 2//15 1//6 1 1//6 1//3 1 4//15 3//10 1 1//30 3//10 1 + ], + upper = N[ + 7//15 17//30 0 13//30 3//5 0 17//30 17//30 0 17//30 13//30 0 3//5 2//3 0 11//30 7//15 0 0 1//2 0 17//30 13//30 0 7//15 13//30 0 + 8//15 1//2 0 3//5 7//15 0 8//15 17//30 0 2//3 17//30 0 11//30 7//15 0 19//30 19//30 0 13//15 1//2 0 17//30 13//30 0 3//5 11//30 0 + 11//30 1//3 1 2//5 8//15 1 7//15 3//5 1 2//3 17//30 1 2//3 8//15 1 2//15 3//5 1 2//3 3//5 1 17//30 2//3 1 7//15 8//15 1 + ], + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 1 3//10 0 0 1//6 1//15 0 1//15 1//6 1 1//6 1//30 0 1//10 1//10 0 1//3 2//15 1 3//10 4//15 0 2//15 2//15 0 + 3//10 1//5 0 3//10 2//15 1 0 1//30 0 0 1//15 0 1//30 7//30 1 1//30 1//15 0 7//30 1//15 0 1//6 1//30 1 1//10 1//15 0 + 3//10 4//15 0 1//10 3//10 0 2//15 1//3 1 3//10 1//10 0 1//6 3//10 0 7//30 1//6 1 1//15 1//15 0 1//10 1//5 0 1//5 4//15 1 + ], + upper = N[ + 2//5 17//30 1 3//5 11//30 0 3//5 7//15 0 19//30 2//5 1 3//5 2//3 0 2//3 8//15 0 8//15 19//30 1 8//15 8//15 0 13//30 13//30 0 + 1//3 13//30 0 11//30 2//5 1 2//3 2//3 0 0 13//30 0 1//2 17//30 1 17//30 1//3 0 2//5 1//3 0 13//30 11//30 1 8//15 1//3 0 + 17//30 3//5 0 8//15 1//2 0 7//15 1//2 1 2//3 17//30 0 11//30 2//5 0 1//2 7//15 1 2//5 17//30 0 11//30 2//5 0 11//30 2//3 1 + ], + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + marginal3 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 1 3//10 3//10 1 4//15 7//30 1 1//5 4//15 0 7//30 1//6 0 1//5 0 0 1//15 1//30 0 3//10 1//3 0 2//15 1//15 0 + 2//15 4//15 0 1//10 1//30 0 7//30 2//15 0 1//15 1//30 1 3//10 1//3 1 1//5 1//10 1 2//15 1//30 0 2//15 4//15 0 0 4//15 0 + 1//5 1//3 0 3//10 1//10 0 1//15 1//10 0 1//30 1//5 0 2//15 7//30 0 1//3 2//15 0 1//10 1//6 1 3//10 1//5 1 7//30 1//30 1 + ], + upper = N[ + 3//5 17//30 1 1//2 3//5 1 19//30 2//5 1 8//15 1//3 0 11//30 2//5 0 17//30 13//30 0 2//5 3//5 0 3//5 11//30 0 1//2 11//30 0 + 3//5 2//3 0 13//30 19//30 0 1//3 2//5 0 17//30 7//15 1 11//30 3//5 1 19//30 7//15 1 2//5 8//15 0 17//30 11//30 0 19//30 13//30 0 + 3//5 2//3 0 1//2 1//2 0 2//3 7//15 0 3//5 3//5 0 1//2 1//3 0 2//5 8//15 0 2//5 11//30 1 1//3 8//15 1 7//15 13//30 1 + ], + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + (marginal1, marginal2, marginal3), + ) # Implicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ] - ), state_indices, action_indices, source_dims, action_vars) - - implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ], + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ], + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + marginal3 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ], + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + implicit_mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + source_dims, + (marginal1, marginal2, marginal3), + ) prop = FiniteTimeSafety([(3, i, j) for i in 1:3 for j in 1:3], 10) spec = Specification(prop, Pessimistic, Maximize) @@ -1332,88 +1249,133 @@ using Random: MersenneTwister action_vars = (1,) # Explicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 1 0 0 7//30 4//15 1//6 1//5 1//10 1//5 1 0 0 0 7//30 7//30 1//5 2//15 1//6 1 0 0 - 1//5 4//15 1//10 1//5 3//10 3//10 0 1 0 1//10 1//15 3//10 3//10 7//30 1//5 0 1 0 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 - 4//15 1//30 1//5 1//5 7//30 4//15 0 0 1 2//15 7//30 1//5 1//3 2//15 1//6 0 0 1 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 1 0 0 17//30 13//30 3//5 2//3 11//30 7//15 1 0 0 0 1//2 17//30 13//30 7//15 13//30 1 0 0 - 8//15 1//2 3//5 7//15 8//15 17//30 0 1 0 2//3 17//30 11//30 7//15 19//30 19//30 0 1 0 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 - 11//30 1//3 2//5 8//15 7//15 3//5 0 0 1 2//3 17//30 2//3 8//15 2//15 3//5 0 0 1 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 0 0 0 1//15 1//6 1//6 1//30 1//10 1//10 0 0 0 1//3 2//15 3//10 4//15 2//15 2//15 0 0 0 - 3//10 1//5 3//10 2//15 0 1//30 0 0 0 0 1//15 1//30 7//30 1//30 1//15 0 0 0 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 - 3//10 4//15 1//10 3//10 2//15 1//3 1 1 1 3//10 1//10 1//6 3//10 7//30 1//6 1 1 1 1//15 1//15 1//10 1//5 1//5 4//15 1 1 1 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 0 0 0 19//30 2//5 3//5 2//3 2//3 8//15 0 0 0 8//15 19//30 8//15 8//15 13//30 13//30 0 0 0 - 1//3 13//30 11//30 2//5 2//3 2//3 0 0 0 0 13//30 1//2 17//30 17//30 1//3 0 0 0 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 - 17//30 3//5 8//15 1//2 7//15 1//2 1 1 1 2//3 17//30 11//30 2//5 1//2 7//15 1 1 1 2//5 17//30 11//30 2//5 11//30 2//3 1 1 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1 1 1 1//5 4//15 7//30 1//6 1//5 0 0 0 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 - 2//15 4//15 1//10 1//30 7//30 2//15 0 0 0 1//15 1//30 3//10 1//3 1//5 1//10 1 1 1 2//15 1//30 2//15 4//15 0 4//15 0 0 0 - 1//5 1//3 3//10 1//10 1//15 1//10 0 0 0 1//30 1//5 2//15 7//30 1//3 2//15 0 0 0 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 1 1 1 8//15 1//3 11//30 2//5 17//30 13//30 0 0 0 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 - 3//5 2//3 13//30 19//30 1//3 2//5 0 0 0 17//30 7//15 11//30 3//5 19//30 7//15 1 1 1 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 - 3//5 2//3 1//2 1//2 2//3 7//15 0 0 0 3//5 3//5 1//2 1//3 2//5 8//15 0 0 0 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 1 0 0 7//30 4//15 1//6 1//5 1//10 1//5 1 0 0 0 7//30 7//30 1//5 2//15 1//6 1 0 0 + 1//5 4//15 1//10 1//5 3//10 3//10 0 1 0 1//10 1//15 3//10 3//10 7//30 1//5 0 1 0 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 + 4//15 1//30 1//5 1//5 7//30 4//15 0 0 1 2//15 7//30 1//5 1//3 2//15 1//6 0 0 1 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 1 0 0 17//30 13//30 3//5 2//3 11//30 7//15 1 0 0 0 1//2 17//30 13//30 7//15 13//30 1 0 0 + 8//15 1//2 3//5 7//15 8//15 17//30 0 1 0 2//3 17//30 11//30 7//15 19//30 19//30 0 1 0 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 + 11//30 1//3 2//5 8//15 7//15 3//5 0 0 1 2//3 17//30 2//3 8//15 2//15 3//5 0 0 1 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 + ], + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 0 0 0 1//15 1//6 1//6 1//30 1//10 1//10 0 0 0 1//3 2//15 3//10 4//15 2//15 2//15 0 0 0 + 3//10 1//5 3//10 2//15 0 1//30 0 0 0 0 1//15 1//30 7//30 1//30 1//15 0 0 0 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 + 3//10 4//15 1//10 3//10 2//15 1//3 1 1 1 3//10 1//10 1//6 3//10 7//30 1//6 1 1 1 1//15 1//15 1//10 1//5 1//5 4//15 1 1 1 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 0 0 0 19//30 2//5 3//5 2//3 2//3 8//15 0 0 0 8//15 19//30 8//15 8//15 13//30 13//30 0 0 0 + 1//3 13//30 11//30 2//5 2//3 2//3 0 0 0 0 13//30 1//2 17//30 17//30 1//3 0 0 0 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 + 17//30 3//5 8//15 1//2 7//15 1//2 1 1 1 2//3 17//30 11//30 2//5 1//2 7//15 1 1 1 2//5 17//30 11//30 2//5 11//30 2//3 1 1 1 + ], + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + marginal3 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1 1 1 1//5 4//15 7//30 1//6 1//5 0 0 0 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 + 2//15 4//15 1//10 1//30 7//30 2//15 0 0 0 1//15 1//30 3//10 1//3 1//5 1//10 1 1 1 2//15 1//30 2//15 4//15 0 4//15 0 0 0 + 1//5 1//3 3//10 1//10 1//15 1//10 0 0 0 1//30 1//5 2//15 7//30 1//3 2//15 0 0 0 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 1 1 1 8//15 1//3 11//30 2//5 17//30 13//30 0 0 0 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 + 3//5 2//3 13//30 19//30 1//3 2//5 0 0 0 17//30 7//15 11//30 3//5 19//30 7//15 1 1 1 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 + 3//5 2//3 1//2 1//2 2//3 7//15 0 0 0 3//5 3//5 1//2 1//3 2//5 8//15 0 0 0 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 + ], + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + (marginal1, marginal2, marginal3), + ) # Implicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ] - ), state_indices, action_indices, source_dims, action_vars) - - implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ], + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ], + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + marginal3 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ], + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + implicit_mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + source_dims, + (marginal1, marginal2, marginal3), + ) prop = FiniteTimeSafety([(i, 3, j) for i in 1:3 for j in 1:3], 10) spec = Specification(prop, Pessimistic, Maximize) @@ -1436,88 +1398,133 @@ using Random: MersenneTwister action_vars = (1,) # Explicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1 0 0 1 0 0 1 0 0 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 0 1 0 0 1 0 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 0 0 1 0 0 1 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 1 0 0 1 0 0 1 0 0 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 0 1 0 0 1 0 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 0 0 1 0 0 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1 1 1 0 0 0 0 0 0 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 1 1 1 0 0 0 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 0 0 0 0 0 0 1 1 1 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 1 1 1 0 0 0 0 0 0 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 1 1 1 0 0 0 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 0 0 0 0 0 0 1 1 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 0 0 0 0 0 0 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 0 0 0 0 0 0 0 0 0 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 1 1 1 1 1 1 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 0 0 0 0 0 0 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 0 0 0 0 0 0 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 1 1 1 1 1 1 - ] - ), state_indices, action_indices, state_vars, action_vars) - - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1 0 0 1 0 0 1 0 0 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 0 1 0 0 1 0 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 0 0 1 0 0 1 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 1 0 0 1 0 0 1 0 0 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 0 1 0 0 1 0 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 0 0 1 0 0 1 + ], + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1 1 1 0 0 0 0 0 0 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 1 1 1 0 0 0 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 0 0 0 0 0 0 1 1 1 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 1 1 1 0 0 0 0 0 0 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 1 1 1 0 0 0 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 0 0 0 0 0 0 1 1 1 + ], + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + marginal3 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 0 0 0 0 0 0 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 0 0 0 0 0 0 0 0 0 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 1 1 1 1 1 1 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 0 0 0 0 0 0 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 0 0 0 0 0 0 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 1 1 1 1 1 1 + ], + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + (marginal1, marginal2, marginal3), + ) # Implicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ], - upper = N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ] - ), state_indices, action_indices, source_dims, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ], - upper = N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ] - ), state_indices, action_indices, source_dims, action_vars) - - implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ], + upper = N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ], + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ], + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + marginal3 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ], + upper = N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ], + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + implicit_mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + source_dims, + (marginal1, marginal2, marginal3), + ) prop = FiniteTimeSafety([(i, j, 3) for i in 1:3 for j in 1:3], 10) spec = Specification(prop, Pessimistic, Maximize) @@ -1541,15 +1548,13 @@ using Random: MersenneTwister prob_upper = [(rand(rng, N, 3, 81) .+ N(1)) ./ N(3) for _ in 1:4] ambiguity_sets = ntuple( - i -> IntervalAmbiguitySets(; - lower = prob_lower[i], - upper = prob_upper[i], - ), + i -> IntervalAmbiguitySets(; lower = prob_lower[i], upper = prob_upper[i]), 4, ) marginals = ntuple( - i -> Marginal(ambiguity_sets[i], (1, 2, 3, 4), (1,), (3, 3, 3, 3), (1,)), + i -> + Marginal(ambiguity_sets[i], (1, 2, 3, 4), (1,), (3, 3, 3, 3), (1,)), 4, ) @@ -1574,15 +1579,14 @@ using Random: MersenneTwister act_idx = CartesianIndex(1) for I in CartesianIndices((3, 3, 3, 3)) for J in CartesianIndices((3, 3, 3, 3)) - marginal_ambiguity_sets = map(marginal -> marginal[act_idx, I], marginals) + marginal_ambiguity_sets = + map(marginal -> marginal[act_idx, I], marginals) - prob_lower_simple[lin[J], lin[I]] = prod( - lower(marginal_ambiguity_sets[i], J[i]) for i in 1:4 - ) + prob_lower_simple[lin[J], lin[I]] = + prod(lower(marginal_ambiguity_sets[i], J[i]) for i in 1:4) - prob_upper_simple[lin[J], lin[I]] = prod( - upper(marginal_ambiguity_sets[i], J[i]) for i in 1:4 - ) + prob_upper_simple[lin[J], lin[I]] = + prod(upper(marginal_ambiguity_sets[i], J[i]) for i in 1:4) end end @@ -1626,15 +1630,18 @@ using Random: MersenneTwister ] ambiguity_sets = ntuple( - i -> IntervalAmbiguitySets(; - lower = prob_lower[i], - upper = prob_upper[i], - ), + i -> IntervalAmbiguitySets(; lower = prob_lower[i], upper = prob_upper[i]), num_axis, ) marginals = ntuple( - i -> Marginal(ambiguity_sets[i], state_indices, action_indices, state_vars, action_vars), + i -> Marginal( + ambiguity_sets[i], + state_indices, + action_indices, + state_vars, + action_vars, + ), num_axis, ) diff --git a/test/base/imdp.jl b/test/base/imdp.jl index f4440008..b3055b31 100644 --- a/test/base/imdp.jl +++ b/test/base/imdp.jl @@ -4,13 +4,13 @@ using IntervalMDP @testset for N in [Float32, Float64, Rational{BigInt}] prob1 = IntervalAmbiguitySets(; lower = N[ - 0 1//2 + 0 1//2 1//10 3//10 - 1//5 1//10 + 1//5 1//10 ], upper = N[ - 1//2 7//10 - 3//5 1//2 + 1//2 7//10 + 3//5 1//2 7//10 3//10 ], ) @@ -18,7 +18,7 @@ using IntervalMDP prob2 = IntervalAmbiguitySets(; lower = N[ 1//10 1//5 - 1//5 3//10 + 1//5 3//10 3//10 2//5 ], upper = N[ @@ -28,18 +28,15 @@ using IntervalMDP ], ) - prob3 = IntervalAmbiguitySets(; - lower = N[ - 0 0 - 0 0 - 1 1 - ], - upper = N[ - 0 0 - 0 0 - 1 1 - ] - ) + prob3 = IntervalAmbiguitySets(; lower = N[ + 0 0 + 0 0 + 1 1 + ], upper = N[ + 0 0 + 0 0 + 1 1 + ]) transition_probs = [prob1, prob2, prob3] istates = [1] @@ -53,18 +50,34 @@ using IntervalMDP V = N[1, 2, 3] Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true) - @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] + @test Vres ≈ N[ + (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, + (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3, + 1 * 3, + ] Vres = similar(Vres) IntervalMDP.bellman!(Vres, V, mdp; upper_bound = false, maximize = true) - @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] + @test Vres ≈ N[ + (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, + (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3, + 1 * 3, + ] Vres = IntervalMDP.bellman(V, mdp; upper_bound = true, maximize = false) - @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (1//5) * 1 + (2//5) * 2 + (2//5) * 3, 1 * 3] + @test Vres ≈ N[ + (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, + (1 // 5) * 1 + (2 // 5) * 2 + (2 // 5) * 3, + 1 * 3, + ] Vres = similar(Vres) IntervalMDP.bellman!(Vres, V, mdp; upper_bound = true, maximize = false) - @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (1//5) * 1 + (2//5) * 2 + (2//5) * 3, 1 * 3] + @test Vres ≈ N[ + (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, + (1 // 5) * 1 + (2 // 5) * 2 + (2 // 5) * 3, + 1 * 3, + ] end @testset "explicit sink state" begin @@ -640,4 +653,4 @@ using IntervalMDP @test res ≈ res_implicit end end -end \ No newline at end of file +end diff --git a/test/base/synthesis.jl b/test/base/synthesis.jl index c105ea7f..52f9526a 100644 --- a/test/base/synthesis.jl +++ b/test/base/synthesis.jl @@ -37,7 +37,7 @@ prob3 = IntervalAmbiguitySets(; 0.0 0.0 0.0 0.0 1.0 1.0 - ] + ], ) transition_probs = [prob1, prob2, prob3] diff --git a/test/base/vi.jl b/test/base/vi.jl index 701cfad1..1a725acd 100644 --- a/test/base/vi.jl +++ b/test/base/vi.jl @@ -124,4 +124,4 @@ using IntervalMDP problem = VerificationProblem(mc, spec) V_conv, _, u = solve(problem) @test maximum(u) <= N(1//1_000_000) -end \ No newline at end of file +end diff --git a/test/cuda/dense/bellman.jl b/test/cuda/dense/bellman.jl index 91663a05..d5ff36c5 100644 --- a/test/cuda/dense/bellman.jl +++ b/test/cuda/dense/bellman.jl @@ -23,14 +23,7 @@ using IntervalMDP, CUDA ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = CUDA.zeros(N, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = true, - ) + IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing @test Vres ≈ N[27 // 10, 17 // 10] # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] end @@ -40,15 +33,8 @@ using IntervalMDP, CUDA ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = CUDA.zeros(N, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = false, - ) + IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing @test Vres ≈ N[17 // 10, 15 // 10] # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3] end -end \ No newline at end of file +end diff --git a/test/cuda/dense/imdp.jl b/test/cuda/dense/imdp.jl index fecf528c..d70f7483 100644 --- a/test/cuda/dense/imdp.jl +++ b/test/cuda/dense/imdp.jl @@ -4,13 +4,13 @@ using IntervalMDP, CUDA @testset for N in [Float32, Float64] prob1 = IntervalAmbiguitySets(; lower = N[ - 0 1//2 + 0 1//2 1//10 3//10 - 1//5 1//10 + 1//5 1//10 ], upper = N[ - 1//2 7//10 - 3//5 1//2 + 1//2 7//10 + 3//5 1//2 7//10 3//10 ], ) @@ -18,7 +18,7 @@ using IntervalMDP, CUDA prob2 = IntervalAmbiguitySets(; lower = N[ 1//10 1//5 - 1//5 3//10 + 1//5 3//10 3//10 2//5 ], upper = N[ @@ -28,18 +28,15 @@ using IntervalMDP, CUDA ], ) - prob3 = IntervalAmbiguitySets(; - lower = N[ - 0 0 - 0 0 - 1 1 - ], - upper = N[ - 0 0 - 0 0 - 1 1 - ] - ) + prob3 = IntervalAmbiguitySets(; lower = N[ + 0 0 + 0 0 + 1 1 + ], upper = N[ + 0 0 + 0 0 + 1 1 + ]) transition_probs = [prob1, prob2, prob3] istates = [1] @@ -53,12 +50,20 @@ using IntervalMDP, CUDA V = IntervalMDP.cu(N[1, 2, 3]) Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true) Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing - @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] + @test Vres ≈ N[ + (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, + (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3, + 1 * 3, + ] Vres = IntervalMDP.cu(similar(Vres)) IntervalMDP.bellman!(Vres, V, mdp; upper_bound = false, maximize = true) Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing - @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] + @test Vres ≈ N[ + (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, + (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3, + 1 * 3, + ] end @testset "explicit sink state" begin @@ -661,4 +666,4 @@ using IntervalMDP, CUDA @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 end end -end \ No newline at end of file +end diff --git a/test/cuda/dense/synthesis.jl b/test/cuda/dense/synthesis.jl index 7f2d79c2..13f98ef6 100644 --- a/test/cuda/dense/synthesis.jl +++ b/test/cuda/dense/synthesis.jl @@ -1,7 +1,6 @@ using Revise, Test using IntervalMDP, CUDA - prob1 = IntervalAmbiguitySets(; lower = [ 0.0 0.5 @@ -38,7 +37,7 @@ prob3 = IntervalAmbiguitySets(; 0.0 0.0 0.0 0.0 1.0 1.0 - ] + ], ) transition_probs = [prob1, prob2, prob3] diff --git a/test/cuda/dense/vi.jl b/test/cuda/dense/vi.jl index 5a23a7b5..5276c9d5 100644 --- a/test/cuda/dense/vi.jl +++ b/test/cuda/dense/vi.jl @@ -191,4 +191,4 @@ using IntervalMDP, CUDA V_conv, _, u = solve(problem) V_conv = IntervalMDP.cpu(V_conv) # Convert to CPU for testing @test maximum(u) <= N(1//1_000_000) -end \ No newline at end of file +end diff --git a/test/cuda/sparse/imdp.jl b/test/cuda/sparse/imdp.jl index 8d4f9d4a..a68301bb 100644 --- a/test/cuda/sparse/imdp.jl +++ b/test/cuda/sparse/imdp.jl @@ -4,13 +4,13 @@ using IntervalMDP, CUDA, SparseArrays @testset for N in [Float32, Float64] prob1 = IntervalAmbiguitySets(; lower = sparse(N[ - 0 1//2 + 0 1//2 1//10 3//10 - 1//5 1//10 + 1//5 1//10 ]), upper = sparse(N[ - 1//2 7//10 - 3//5 1//2 + 1//2 7//10 + 3//5 1//2 7//10 3//10 ]), ) @@ -18,7 +18,7 @@ using IntervalMDP, CUDA, SparseArrays prob2 = IntervalAmbiguitySets(; lower = sparse(N[ 1//10 1//5 - 1//5 3//10 + 1//5 3//10 3//10 2//5 ]), upper = sparse(N[ @@ -38,7 +38,7 @@ using IntervalMDP, CUDA, SparseArrays 0 0 0 0 1 1 - ]) + ]), ) transition_probs = [prob1, prob2, prob3] @@ -53,12 +53,20 @@ using IntervalMDP, CUDA, SparseArrays V = IntervalMDP.cu(N[1, 2, 3]) Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true) Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing - @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] + @test Vres ≈ N[ + (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, + (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3, + 1 * 3, + ] Vres = IntervalMDP.cu(similar(Vres)) IntervalMDP.bellman!(Vres, V, mdp; upper_bound = false, maximize = true) Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing - @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] + @test Vres ≈ N[ + (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, + (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3, + 1 * 3, + ] end @testset "explicit sink state" begin @@ -661,4 +669,4 @@ using IntervalMDP, CUDA, SparseArrays @test IntervalMDP.cpu(res) ≈ IntervalMDP.cpu(res_implicit) atol=1e-5 end end -end \ No newline at end of file +end diff --git a/test/cuda/sparse/synthesis.jl b/test/cuda/sparse/synthesis.jl index 29ff45c5..1f4f58e0 100644 --- a/test/cuda/sparse/synthesis.jl +++ b/test/cuda/sparse/synthesis.jl @@ -1,7 +1,6 @@ using Revise, Test using IntervalMDP, CUDA, SparseArrays - prob1 = IntervalAmbiguitySets(; lower = sparse([ 0.0 0.5 @@ -38,7 +37,7 @@ prob3 = IntervalAmbiguitySets(; 0.0 0.0 0.0 0.0 1.0 1.0 - ]) + ]), ) transition_probs = [prob1, prob2, prob3] diff --git a/test/cuda/sparse/vi.jl b/test/cuda/sparse/vi.jl index 1e663e1e..130c38c1 100644 --- a/test/cuda/sparse/vi.jl +++ b/test/cuda/sparse/vi.jl @@ -191,4 +191,4 @@ using IntervalMDP, CUDA, SparseArrays V_conv, _, u = solve(problem) V_conv = IntervalMDP.cpu(V_conv) # Convert to CPU for testing @test maximum(u) <= N(1//1_000_000) -end \ No newline at end of file +end diff --git a/test/data/bmdp_tool.jl b/test/data/bmdp_tool.jl index 0a0ac3a3..87c21cc7 100644 --- a/test/data/bmdp_tool.jl +++ b/test/data/bmdp_tool.jl @@ -67,4 +67,4 @@ end @test as.gap ≈ new_as.gap @test tstates == new_tstates -end \ No newline at end of file +end diff --git a/test/sparse/bellman.jl b/test/sparse/bellman.jl index ef93e885..1aaa9276 100644 --- a/test/sparse/bellman.jl +++ b/test/sparse/bellman.jl @@ -1,7 +1,6 @@ using Revise, Test using IntervalMDP, SparseArrays - @testset for N in [Float32, Float64, Rational{BigInt}] prob = IntervalAmbiguitySets(; lower = sparse_hcat( @@ -21,40 +20,19 @@ using IntervalMDP, SparseArrays ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = zeros(N, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = true, - ) + IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) @test Vres ≈ N[82 // 10, 57 // 10] # [0.3 * 4 + 0.7 * 10, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = true, - ) + IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) @test Vres ≈ N[82 // 10, 57 // 10] # [0.3 * 4 + 0.7 * 10, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = true, - ) + IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) @test Vres ≈ N[82 // 10, 57 // 10] # [0.3 * 4 + 0.7 * 10, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] end @@ -63,40 +41,19 @@ using IntervalMDP, SparseArrays ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = zeros(N, 2) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = false, - ) + IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) @test Vres ≈ N[37 // 10, 55 // 10] # [0.5 * 1 + 0.3 * 4 + 0.2 * 10, 0.6 * 5 + 0.3 * 6 + 0.1 * 7] ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = false, - ) + IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) @test Vres ≈ N[37 // 10, 55 // 10] # [0.5 * 1 + 0.3 * 4 + 0.2 * 10, 0.6 * 5 + 0.3 * 6 + 0.1 * 7] ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( - ws, - strategy_cache, - Vres, - V, - prob; - upper_bound = false, - ) + IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) @test Vres ≈ N[37 // 10, 55 // 10] # [0.5 * 1 + 0.3 * 4 + 0.2 * 10, 0.6 * 5 + 0.3 * 6 + 0.1 * 7] end end diff --git a/test/sparse/factored.jl b/test/sparse/factored.jl index b88bd85e..5fbc2c17 100644 --- a/test/sparse/factored.jl +++ b/test/sparse/factored.jl @@ -6,12 +6,12 @@ using Random: MersenneTwister @testset "bellman 1d" begin ambiguity_sets = IntervalAmbiguitySets(; lower = sparse(N[ - 0 5//10 2//10 - 1//10 0 3//10 + 0 5//10 2//10 + 1//10 0 3//10 2//10 1//10 5//10 ]), upper = sparse(N[ - 0 7//10 3//10 + 0 7//10 3//10 6//10 5//10 4//10 7//10 3//10 5//10 ]), @@ -35,8 +35,8 @@ using Random: MersenneTwister @test length(verts) <= 6 # = number of permutations of 3 elements expected_verts = N[ # duplicates due to budget < gap for all elements - 7//10 2//10 1//10 - 7//10 0 3//10 + 7 // 10 2//10 1//10 + 7//10 0 3//10 5//10 4//10 1//10 7//10 0//10 3//10 5//10 2//10 3//10 @@ -47,9 +47,7 @@ using Random: MersenneTwister verts = IntervalMDP.vertices(ambiguity_sets[3]) @test length(verts) <= 6 # = number of permutations of 3 elements - expected_verts = N[ # Only one vertex since sum(lower) = 1 - 2//10 3//10 5//10 - ] + expected_verts = N[2 // 10 3//10 5//10] @test length(verts) ≥ size(expected_verts, 1) # at least the unique ones @test all(any(v2 -> v1 ≈ v2, verts) for v1 in eachrow(expected_verts)) end @@ -60,79 +58,41 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + imc, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.construct_workspace(imc, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.FactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.ThreadedFactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected end @@ -142,79 +102,41 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + imc, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.construct_workspace(imc, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.FactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.ThreadedFactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - imc; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected end end @@ -225,35 +147,59 @@ using Random: MersenneTwister state_vars = (2, 3) action_vars = (1,) - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//15 0 1//15 13//30 4//15 0 - 2//5 7//30 0 11//30 2//15 1//10 - ]), - upper = sparse(N[ - 17//30 7//10 2//3 4//5 7//10 2//3 - 9//10 13//15 9//10 5//6 4//5 14//15 - ]) - ), state_indices, action_indices, state_vars, action_vars) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//15 0 1//15 13//30 4//15 0 + 2//5 7//30 0 11//30 2//15 1//10 + ], + ), + upper = sparse( + N[ + 17//30 7//10 2//3 4//5 7//10 2//3 + 9//10 13//15 9//10 5//6 4//5 14//15 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//30 1//3 1//6 1//15 0 2//15 - 0 1//4 1//6 1//30 2//15 1//30 - 2//15 7//30 1//10 7//30 7//15 1//5 - ]), - upper = sparse(N[ - 2//3 7//15 4//5 11//30 19//30 1//2 - 23//30 4//5 23//30 3//5 7//10 8//15 - 7//15 4//5 23//30 7//10 7//15 23//30 - ]) - ), state_indices, action_indices, state_vars, action_vars) + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//30 1//3 1//6 1//15 0 2//15 + 0 1//4 1//6 1//30 2//15 1//30 + 2//15 7//30 1//10 7//30 7//15 1//5 + ], + ), + upper = sparse( + N[ + 2//3 7//15 4//5 11//30 19//30 1//2 + 23//30 4//5 23//30 3//5 7//10 8//15 + 7//15 4//5 23//30 7//10 7//15 23//30 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) + mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + (marginal1, marginal2), + ) V = N[ 3 13 18 - 12 16 8 + 12 16 8 ] #### Maximization @@ -261,14 +207,7 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - V_vertex, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = true) @test V_vertex ≈ N[ 1076//75 4279//300 1081//75 @@ -292,30 +231,20 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) @@ -338,27 +267,13 @@ using Random: MersenneTwister ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax end @@ -367,17 +282,10 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - V_vertex, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = false) @test V_vertex ≈ N[ - 412//45 41//5 488//45 + 412//45 41//5 488//45 1033//100 543//50 4253//450 ] @@ -398,30 +306,20 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .- epsilon .<= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) @@ -444,27 +342,13 @@ using Random: MersenneTwister ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax end end @@ -473,35 +357,51 @@ using Random: MersenneTwister state_vars = (2, 3) action_vars = (1, 2) - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 0 7//30 0 13//30 4//15 1//6 - 2//5 7//30 0 11//30 2//15 0 - ], - upper = N[ - 17//30 7//10 2//3 4//5 7//10 2//3 - 9//10 13//15 9//10 5//6 4//5 14//15 - ] - ), (1, 2), (1,), (2, 3), (1,)) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 0 1//3 1//6 1//15 2//5 2//15 - 4//15 1//4 1//6 0 2//15 0 - 2//15 7//30 0 7//30 7//15 1//5 - ], - upper = N[ - 2//3 7//15 4//5 11//30 19//30 1//2 - 23//30 4//5 23//30 3//5 7//10 8//15 - 7//15 4//5 23//30 7//10 7//15 23//30 - ] - ), (2,), (2,), (3,), (2,)) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 0 7//30 0 13//30 4//15 1//6 + 2//5 7//30 0 11//30 2//15 0 + ], + upper = N[ + 17//30 7//10 2//3 4//5 7//10 2//3 + 9//10 13//15 9//10 5//6 4//5 14//15 + ], + ), + (1, 2), + (1,), + (2, 3), + (1,), + ) + + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 0 1//3 1//6 1//15 2//5 2//15 + 4//15 1//4 1//6 0 2//15 0 + 2//15 7//30 0 7//30 7//15 1//5 + ], + upper = N[ + 2//3 7//15 4//5 11//30 19//30 1//2 + 23//30 4//5 23//30 3//5 7//10 8//15 + 7//15 4//5 23//30 7//10 7//15 23//30 + ], + ), + (2,), + (2,), + (3,), + (2,), + ) - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2)) + mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + (marginal1, marginal2), + ) V = N[ 3 13 18 - 12 16 8 + 12 16 8 ] #### Maximization @@ -537,7 +437,8 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -551,7 +452,10 @@ using Random: MersenneTwister ) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -644,7 +548,8 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -658,7 +563,10 @@ using Random: MersenneTwister ) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -752,7 +660,8 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .- epsilon .<= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -766,7 +675,10 @@ using Random: MersenneTwister ) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -859,7 +771,8 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .- epsilon .<= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -873,7 +786,10 @@ using Random: MersenneTwister ) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) IntervalMDP.bellman!( @@ -941,46 +857,80 @@ using Random: MersenneTwister state_vars = (3, 3, 3) action_vars = (1,) - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1//10 1//30 1//10 1//15 1//10 1//15 4//15 4//15 1//3 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 1//5 1//5 1//10 1//30 4//15 1//10 1//5 1//6 7//30 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 3//10 1//10 1//15 1//30 2//15 1//6 1//5 1//10 4//15 - ]), - upper = sparse(N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 17//30 13//30 2//5 2//5 2//3 2//5 17//30 2//5 19//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 8//15 7//15 7//15 13//30 8//15 2//5 8//15 17//30 3//5 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 2//5 2//5 11//30 17//30 17//30 1//2 2//5 19//30 13//30 - ]) - ), state_indices, action_indices, state_vars, action_vars) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1//10 1//30 1//10 1//15 1//10 1//15 4//15 4//15 1//3 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 1//5 1//5 1//10 1//30 4//15 1//10 1//5 1//6 7//30 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 3//10 1//10 1//15 1//30 2//15 1//6 1//5 1//10 4//15 + ], + ), + upper = sparse( + N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 17//30 13//30 2//5 2//5 2//3 2//5 17//30 2//5 19//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 8//15 7//15 7//15 13//30 8//15 2//5 8//15 17//30 3//5 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 2//5 2//5 11//30 17//30 17//30 1//2 2//5 19//30 13//30 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1//6 7//30 1//15 2//15 1//10 1//3 7//30 1//30 7//30 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 3//10 0 3//10 1//6 3//10 1//5 0 7//30 2//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 1//15 1//3 2//15 1//15 1//5 1//5 1//15 7//30 1//15 - ]), - upper = sparse(N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 13//30 17//30 17//30 13//30 11//30 19//30 8//15 2//5 8//15 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 1//2 8//15 8//15 8//15 8//15 2//5 3//5 2//3 13//30 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 1//3 2//3 17//30 8//15 17//30 3//5 2//5 19//30 11//30 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 7//30 4//15 1//10 1//3 1//5 7//30 1//30 1//5 7//30 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 1//5 4//15 1//10 1//10 1//3 7//30 3//10 1//3 3//10 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 0 1//30 1//15 2//15 1//6 7//30 4//15 4//15 7//30 - ]), - upper = sparse(N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 2//3 17//30 3//5 7//15 19//30 1//2 3//5 1//3 19//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 2//3 17//30 8//15 13//30 13//30 3//5 1//2 8//15 8//15 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 0 2//5 11//30 19//30 19//30 2//5 1//2 7//15 7//15 - ]) - ), state_indices, action_indices, state_vars, action_vars) + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1//6 7//30 1//15 2//15 1//10 1//3 7//30 1//30 7//30 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 3//10 0 3//10 1//6 3//10 1//5 0 7//30 2//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 1//15 1//3 2//15 1//15 1//5 1//5 1//15 7//30 1//15 + ], + ), + upper = sparse( + N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 13//30 17//30 17//30 13//30 11//30 19//30 8//15 2//5 8//15 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 1//2 8//15 8//15 8//15 8//15 2//5 3//5 2//3 13//30 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 1//3 2//3 17//30 8//15 17//30 3//5 2//5 19//30 11//30 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + marginal3 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 7//30 4//15 1//10 1//3 1//5 7//30 1//30 1//5 7//30 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 1//5 4//15 1//10 1//10 1//3 7//30 3//10 1//3 3//10 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 0 1//30 1//15 2//15 1//6 7//30 4//15 4//15 7//30 + ], + ), + upper = sparse( + N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 2//3 17//30 3//5 7//15 19//30 1//2 3//5 1//3 19//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 2//3 17//30 8//15 13//30 13//30 3//5 1//2 8//15 8//15 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 0 2//5 11//30 19//30 19//30 2//5 1//2 7//15 7//15 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + (marginal1, marginal2, marginal3), + ) V = N[ 23, @@ -1018,14 +968,7 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - V_vertex, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = true) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -1044,30 +987,20 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) @@ -1090,27 +1023,13 @@ using Random: MersenneTwister ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax end @@ -1119,14 +1038,7 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - V_vertex, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = false) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -1145,30 +1057,20 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .- epsilon .<= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) @@ -1191,27 +1093,13 @@ using Random: MersenneTwister ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax end end @@ -1222,46 +1110,76 @@ using Random: MersenneTwister state_vars = (3, 3, 3) action_vars = (1,) - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1//10 1//30 1//10 1//15 1//10 1//15 4//15 4//15 1//3 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 1//5 1//5 1//10 1//30 4//15 1//10 1//5 1//6 7//30 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 3//10 1//10 1//15 1//30 2//15 1//6 1//5 1//10 4//15 - ]), - upper = sparse(N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 17//30 13//30 2//5 2//5 2//3 2//5 17//30 2//5 19//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 8//15 7//15 7//15 13//30 8//15 2//5 8//15 17//30 3//5 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 2//5 2//5 11//30 17//30 17//30 1//2 2//5 19//30 13//30 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1//6 7//30 1//15 2//15 1//10 1//3 7//30 1//30 7//30 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 3//10 0 3//10 1//6 3//10 1//5 0 7//30 2//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 1//15 1//3 2//15 1//15 1//5 1//5 1//15 7//30 1//15 - ], - upper = N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 13//30 17//30 17//30 13//30 11//30 19//30 8//15 2//5 8//15 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 1//2 8//15 8//15 8//15 8//15 2//5 3//5 2//3 13//30 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 1//3 2//3 17//30 8//15 17//30 3//5 2//5 19//30 11//30 - ] - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 7//30 4//15 1//10 1//3 1//5 7//30 1//30 1//5 7//30 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 1//5 4//15 1//10 1//10 1//3 7//30 3//10 1//3 3//10 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 0 1//30 1//15 2//15 1//6 7//30 4//15 4//15 7//30 - ]), - upper = sparse(N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 2//3 17//30 3//5 7//15 19//30 1//2 3//5 1//3 19//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 2//3 17//30 8//15 13//30 13//30 3//5 1//2 8//15 8//15 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 0 2//5 11//30 19//30 19//30 2//5 1//2 7//15 7//15 - ]) - ), state_indices, action_indices, state_vars, action_vars) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1//10 1//30 1//10 1//15 1//10 1//15 4//15 4//15 1//3 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 1//5 1//5 1//10 1//30 4//15 1//10 1//5 1//6 7//30 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 3//10 1//10 1//15 1//30 2//15 1//6 1//5 1//10 4//15 + ], + ), + upper = sparse( + N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 17//30 13//30 2//5 2//5 2//3 2//5 17//30 2//5 19//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 8//15 7//15 7//15 13//30 8//15 2//5 8//15 17//30 3//5 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 2//5 2//5 11//30 17//30 17//30 1//2 2//5 19//30 13//30 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1//6 7//30 1//15 2//15 1//10 1//3 7//30 1//30 7//30 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 3//10 0 3//10 1//6 3//10 1//5 0 7//30 2//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 1//15 1//3 2//15 1//15 1//5 1//5 1//15 7//30 1//15 + ], + upper = N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 13//30 17//30 17//30 13//30 11//30 19//30 8//15 2//5 8//15 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 1//2 8//15 8//15 8//15 8//15 2//5 3//5 2//3 13//30 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 1//3 2//3 17//30 8//15 17//30 3//5 2//5 19//30 11//30 + ], + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + marginal3 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 7//30 4//15 1//10 1//3 1//5 7//30 1//30 1//5 7//30 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 1//5 4//15 1//10 1//10 1//3 7//30 3//10 1//3 3//10 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 0 1//30 1//15 2//15 1//6 7//30 4//15 4//15 7//30 + ], + ), + upper = sparse( + N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 2//3 17//30 3//5 7//15 19//30 1//2 3//5 1//3 19//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 2//3 17//30 8//15 13//30 13//30 3//5 1//2 8//15 8//15 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 0 2//5 11//30 19//30 19//30 2//5 1//2 7//15 7//15 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + (marginal1, marginal2, marginal3), + ) V = N[ 23, @@ -1299,14 +1217,7 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - V_vertex, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = true) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -1325,30 +1236,20 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .+ epsilon .>= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) @@ -1371,27 +1272,13 @@ using Random: MersenneTwister ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = true, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax end @@ -1400,14 +1287,7 @@ using Random: MersenneTwister ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( - ws, - strategy_cache, - V_vertex, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = false) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) @@ -1426,30 +1306,20 @@ using Random: MersenneTwister @test all(Vres_first_McCormick .<= maximum(V)) @test all(Vres_first_McCormick .- epsilon .<= V_vertex) - ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = + IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick - ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) + ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( + mdp, + LPMcCormickRelaxation(), + ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) @@ -1472,32 +1342,22 @@ using Random: MersenneTwister ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( - ws, - strategy_cache, - Vres, - V, - mdp; - upper_bound = false, - ) + IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax end end - @testset for alg in [RobustValueIteration(LPMcCormickRelaxation()), RobustValueIteration(OMaximization()), RobustValueIteration(VertexEnumeration())] + @testset for alg in [ + RobustValueIteration(LPMcCormickRelaxation()), + RobustValueIteration(OMaximization()), + RobustValueIteration(VertexEnumeration()), + ] @testset "implicit sink state" begin @testset "first dimension" begin state_indices = (1, 2, 3) @@ -1507,88 +1367,157 @@ using Random: MersenneTwister action_vars = (1,) # Explicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//15 3//10 0 1//15 3//10 0 1//30 1//3 0 7//30 4//15 0 1//6 1//5 0 1//10 1//5 0 0 7//30 0 7//30 1//5 0 2//15 1//6 0 - 1//5 4//15 0 1//10 1//5 0 3//10 3//10 0 1//10 1//15 0 3//10 3//10 0 7//30 1//5 0 1//10 1//5 0 1//5 1//30 0 1//5 3//10 0 - 4//15 1//30 1 1//5 1//5 1 7//30 4//15 1 2//15 7//30 1 1//5 1//3 1 2//15 1//6 1 1//6 1//3 1 4//15 3//10 1 1//30 3//10 1 - ]), - upper = sparse(N[ - 7//15 17//30 0 13//30 3//5 0 17//30 17//30 0 17//30 13//30 0 3//5 2//3 0 11//30 7//15 0 0 1//2 0 17//30 13//30 0 7//15 13//30 0 - 8//15 1//2 0 3//5 7//15 0 8//15 17//30 0 2//3 17//30 0 11//30 7//15 0 19//30 19//30 0 13//15 1//2 0 17//30 13//30 0 3//5 11//30 0 - 11//30 1//3 1 2//5 8//15 1 7//15 3//5 1 2//3 17//30 1 2//3 8//15 1 2//15 3//5 1 2//3 3//5 1 17//30 2//3 1 7//15 8//15 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//10 1//15 1 3//10 0 0 1//6 1//15 0 1//15 1//6 1 1//6 1//30 0 1//10 1//10 0 1//3 2//15 1 3//10 4//15 0 2//15 2//15 0 - 3//10 1//5 0 3//10 2//15 1 0 1//30 0 0 1//15 0 1//30 7//30 1 1//30 1//15 0 7//30 1//15 0 1//6 1//30 1 1//10 1//15 0 - 3//10 4//15 0 1//10 3//10 0 2//15 1//3 1 3//10 1//10 0 1//6 3//10 0 7//30 1//6 1 1//15 1//15 0 1//10 1//5 0 1//5 4//15 1 - ]), - upper = sparse(N[ - 2//5 17//30 1 3//5 11//30 0 3//5 7//15 0 19//30 2//5 1 3//5 2//3 0 2//3 8//15 0 8//15 19//30 1 8//15 8//15 0 13//30 13//30 0 - 1//3 13//30 0 11//30 2//5 1 2//3 2//3 0 0 13//30 0 1//2 17//30 1 17//30 1//3 0 2//5 1//3 0 13//30 11//30 1 8//15 1//3 0 - 17//30 3//5 0 8//15 1//2 0 7//15 1//2 1 2//3 17//30 0 11//30 2//5 0 1//2 7//15 1 2//5 17//30 0 11//30 2//5 0 11//30 2//3 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 4//15 1//5 1 3//10 3//10 1 4//15 7//30 1 1//5 4//15 0 7//30 1//6 0 1//5 0 0 1//15 1//30 0 3//10 1//3 0 2//15 1//15 0 - 2//15 4//15 0 1//10 1//30 0 7//30 2//15 0 1//15 1//30 1 3//10 1//3 1 1//5 1//10 1 2//15 1//30 0 2//15 4//15 0 0 4//15 0 - 1//5 1//3 0 3//10 1//10 0 1//15 1//10 0 1//30 1//5 0 2//15 7//30 0 1//3 2//15 0 1//10 1//6 1 3//10 1//5 1 7//30 1//30 1 - ]), - upper = sparse(N[ - 3//5 17//30 1 1//2 3//5 1 19//30 2//5 1 8//15 1//3 0 11//30 2//5 0 17//30 13//30 0 2//5 3//5 0 3//5 11//30 0 1//2 11//30 0 - 3//5 2//3 0 13//30 19//30 0 1//3 2//5 0 17//30 7//15 1 11//30 3//5 1 19//30 7//15 1 2//5 8//15 0 17//30 11//30 0 19//30 13//30 0 - 3//5 2//3 0 1//2 1//2 0 2//3 7//15 0 3//5 3//5 0 1//2 1//3 0 2//5 8//15 0 2//5 11//30 1 1//3 8//15 1 7//15 13//30 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//15 3//10 0 1//15 3//10 0 1//30 1//3 0 7//30 4//15 0 1//6 1//5 0 1//10 1//5 0 0 7//30 0 7//30 1//5 0 2//15 1//6 0 + 1//5 4//15 0 1//10 1//5 0 3//10 3//10 0 1//10 1//15 0 3//10 3//10 0 7//30 1//5 0 1//10 1//5 0 1//5 1//30 0 1//5 3//10 0 + 4//15 1//30 1 1//5 1//5 1 7//30 4//15 1 2//15 7//30 1 1//5 1//3 1 2//15 1//6 1 1//6 1//3 1 4//15 3//10 1 1//30 3//10 1 + ], + ), + upper = sparse( + N[ + 7//15 17//30 0 13//30 3//5 0 17//30 17//30 0 17//30 13//30 0 3//5 2//3 0 11//30 7//15 0 0 1//2 0 17//30 13//30 0 7//15 13//30 0 + 8//15 1//2 0 3//5 7//15 0 8//15 17//30 0 2//3 17//30 0 11//30 7//15 0 19//30 19//30 0 13//15 1//2 0 17//30 13//30 0 3//5 11//30 0 + 11//30 1//3 1 2//5 8//15 1 7//15 3//5 1 2//3 17//30 1 2//3 8//15 1 2//15 3//5 1 2//3 3//5 1 17//30 2//3 1 7//15 8//15 1 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//10 1//15 1 3//10 0 0 1//6 1//15 0 1//15 1//6 1 1//6 1//30 0 1//10 1//10 0 1//3 2//15 1 3//10 4//15 0 2//15 2//15 0 + 3//10 1//5 0 3//10 2//15 1 0 1//30 0 0 1//15 0 1//30 7//30 1 1//30 1//15 0 7//30 1//15 0 1//6 1//30 1 1//10 1//15 0 + 3//10 4//15 0 1//10 3//10 0 2//15 1//3 1 3//10 1//10 0 1//6 3//10 0 7//30 1//6 1 1//15 1//15 0 1//10 1//5 0 1//5 4//15 1 + ], + ), + upper = sparse( + N[ + 2//5 17//30 1 3//5 11//30 0 3//5 7//15 0 19//30 2//5 1 3//5 2//3 0 2//3 8//15 0 8//15 19//30 1 8//15 8//15 0 13//30 13//30 0 + 1//3 13//30 0 11//30 2//5 1 2//3 2//3 0 0 13//30 0 1//2 17//30 1 17//30 1//3 0 2//5 1//3 0 13//30 11//30 1 8//15 1//3 0 + 17//30 3//5 0 8//15 1//2 0 7//15 1//2 1 2//3 17//30 0 11//30 2//5 0 1//2 7//15 1 2//5 17//30 0 11//30 2//5 0 11//30 2//3 1 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + marginal3 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 4//15 1//5 1 3//10 3//10 1 4//15 7//30 1 1//5 4//15 0 7//30 1//6 0 1//5 0 0 1//15 1//30 0 3//10 1//3 0 2//15 1//15 0 + 2//15 4//15 0 1//10 1//30 0 7//30 2//15 0 1//15 1//30 1 3//10 1//3 1 1//5 1//10 1 2//15 1//30 0 2//15 4//15 0 0 4//15 0 + 1//5 1//3 0 3//10 1//10 0 1//15 1//10 0 1//30 1//5 0 2//15 7//30 0 1//3 2//15 0 1//10 1//6 1 3//10 1//5 1 7//30 1//30 1 + ], + ), + upper = sparse( + N[ + 3//5 17//30 1 1//2 3//5 1 19//30 2//5 1 8//15 1//3 0 11//30 2//5 0 17//30 13//30 0 2//5 3//5 0 3//5 11//30 0 1//2 11//30 0 + 3//5 2//3 0 13//30 19//30 0 1//3 2//5 0 17//30 7//15 1 11//30 3//5 1 19//30 7//15 1 2//5 8//15 0 17//30 11//30 0 19//30 13//30 0 + 3//5 2//3 0 1//2 1//2 0 2//3 7//15 0 3//5 3//5 0 1//2 1//3 0 2//5 8//15 0 2//5 11//30 1 1//3 8//15 1 7//15 13//30 1 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + (marginal1, marginal2, marginal3), + ) # Implicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ]), - upper = sparse(N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ]), - upper = sparse(N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ]), - upper = sparse(N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ], + ), + upper = sparse( + N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ], + ), + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ], + ), + upper = sparse( + N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ], + ), + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + marginal3 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ], + ), + upper = sparse( + N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ], + ), + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + implicit_mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + source_dims, + (marginal1, marginal2, marginal3), + ) prop = FiniteTimeSafety([(3, i, j) for i in 1:3 for j in 1:3], 10) spec = Specification(prop, Pessimistic, Maximize) @@ -1611,88 +1540,157 @@ using Random: MersenneTwister action_vars = (1,) # Explicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//15 3//10 1//15 3//10 1//30 1//3 1 0 0 7//30 4//15 1//6 1//5 1//10 1//5 1 0 0 0 7//30 7//30 1//5 2//15 1//6 1 0 0 - 1//5 4//15 1//10 1//5 3//10 3//10 0 1 0 1//10 1//15 3//10 3//10 7//30 1//5 0 1 0 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 - 4//15 1//30 1//5 1//5 7//30 4//15 0 0 1 2//15 7//30 1//5 1//3 2//15 1//6 0 0 1 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 - ]), - upper = sparse(N[ - 7//15 17//30 13//30 3//5 17//30 17//30 1 0 0 17//30 13//30 3//5 2//3 11//30 7//15 1 0 0 0 1//2 17//30 13//30 7//15 13//30 1 0 0 - 8//15 1//2 3//5 7//15 8//15 17//30 0 1 0 2//3 17//30 11//30 7//15 19//30 19//30 0 1 0 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 - 11//30 1//3 2//5 8//15 7//15 3//5 0 0 1 2//3 17//30 2//3 8//15 2//15 3//5 0 0 1 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//10 1//15 3//10 0 1//6 1//15 0 0 0 1//15 1//6 1//6 1//30 1//10 1//10 0 0 0 1//3 2//15 3//10 4//15 2//15 2//15 0 0 0 - 3//10 1//5 3//10 2//15 0 1//30 0 0 0 0 1//15 1//30 7//30 1//30 1//15 0 0 0 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 - 3//10 4//15 1//10 3//10 2//15 1//3 1 1 1 3//10 1//10 1//6 3//10 7//30 1//6 1 1 1 1//15 1//15 1//10 1//5 1//5 4//15 1 1 1 - ]), - upper = sparse(N[ - 2//5 17//30 3//5 11//30 3//5 7//15 0 0 0 19//30 2//5 3//5 2//3 2//3 8//15 0 0 0 8//15 19//30 8//15 8//15 13//30 13//30 0 0 0 - 1//3 13//30 11//30 2//5 2//3 2//3 0 0 0 0 13//30 1//2 17//30 17//30 1//3 0 0 0 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 - 17//30 3//5 8//15 1//2 7//15 1//2 1 1 1 2//3 17//30 11//30 2//5 1//2 7//15 1 1 1 2//5 17//30 11//30 2//5 11//30 2//3 1 1 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1 1 1 1//5 4//15 7//30 1//6 1//5 0 0 0 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 - 2//15 4//15 1//10 1//30 7//30 2//15 0 0 0 1//15 1//30 3//10 1//3 1//5 1//10 1 1 1 2//15 1//30 2//15 4//15 0 4//15 0 0 0 - 1//5 1//3 3//10 1//10 1//15 1//10 0 0 0 1//30 1//5 2//15 7//30 1//3 2//15 0 0 0 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 - ]), - upper = sparse(N[ - 3//5 17//30 1//2 3//5 19//30 2//5 1 1 1 8//15 1//3 11//30 2//5 17//30 13//30 0 0 0 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 - 3//5 2//3 13//30 19//30 1//3 2//5 0 0 0 17//30 7//15 11//30 3//5 19//30 7//15 1 1 1 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 - 3//5 2//3 1//2 1//2 2//3 7//15 0 0 0 3//5 3//5 1//2 1//3 2//5 8//15 0 0 0 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//15 3//10 1//15 3//10 1//30 1//3 1 0 0 7//30 4//15 1//6 1//5 1//10 1//5 1 0 0 0 7//30 7//30 1//5 2//15 1//6 1 0 0 + 1//5 4//15 1//10 1//5 3//10 3//10 0 1 0 1//10 1//15 3//10 3//10 7//30 1//5 0 1 0 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 + 4//15 1//30 1//5 1//5 7//30 4//15 0 0 1 2//15 7//30 1//5 1//3 2//15 1//6 0 0 1 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 + ], + ), + upper = sparse( + N[ + 7//15 17//30 13//30 3//5 17//30 17//30 1 0 0 17//30 13//30 3//5 2//3 11//30 7//15 1 0 0 0 1//2 17//30 13//30 7//15 13//30 1 0 0 + 8//15 1//2 3//5 7//15 8//15 17//30 0 1 0 2//3 17//30 11//30 7//15 19//30 19//30 0 1 0 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 + 11//30 1//3 2//5 8//15 7//15 3//5 0 0 1 2//3 17//30 2//3 8//15 2//15 3//5 0 0 1 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//10 1//15 3//10 0 1//6 1//15 0 0 0 1//15 1//6 1//6 1//30 1//10 1//10 0 0 0 1//3 2//15 3//10 4//15 2//15 2//15 0 0 0 + 3//10 1//5 3//10 2//15 0 1//30 0 0 0 0 1//15 1//30 7//30 1//30 1//15 0 0 0 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 + 3//10 4//15 1//10 3//10 2//15 1//3 1 1 1 3//10 1//10 1//6 3//10 7//30 1//6 1 1 1 1//15 1//15 1//10 1//5 1//5 4//15 1 1 1 + ], + ), + upper = sparse( + N[ + 2//5 17//30 3//5 11//30 3//5 7//15 0 0 0 19//30 2//5 3//5 2//3 2//3 8//15 0 0 0 8//15 19//30 8//15 8//15 13//30 13//30 0 0 0 + 1//3 13//30 11//30 2//5 2//3 2//3 0 0 0 0 13//30 1//2 17//30 17//30 1//3 0 0 0 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 + 17//30 3//5 8//15 1//2 7//15 1//2 1 1 1 2//3 17//30 11//30 2//5 1//2 7//15 1 1 1 2//5 17//30 11//30 2//5 11//30 2//3 1 1 1 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + marginal3 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1 1 1 1//5 4//15 7//30 1//6 1//5 0 0 0 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 + 2//15 4//15 1//10 1//30 7//30 2//15 0 0 0 1//15 1//30 3//10 1//3 1//5 1//10 1 1 1 2//15 1//30 2//15 4//15 0 4//15 0 0 0 + 1//5 1//3 3//10 1//10 1//15 1//10 0 0 0 1//30 1//5 2//15 7//30 1//3 2//15 0 0 0 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 + ], + ), + upper = sparse( + N[ + 3//5 17//30 1//2 3//5 19//30 2//5 1 1 1 8//15 1//3 11//30 2//5 17//30 13//30 0 0 0 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 + 3//5 2//3 13//30 19//30 1//3 2//5 0 0 0 17//30 7//15 11//30 3//5 19//30 7//15 1 1 1 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 + 3//5 2//3 1//2 1//2 2//3 7//15 0 0 0 3//5 3//5 1//2 1//3 2//5 8//15 0 0 0 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + (marginal1, marginal2, marginal3), + ) # Implicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ]), - upper = sparse(N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ]), - upper = sparse(N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ]), - upper = sparse(N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ], + ), + upper = sparse( + N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ], + ), + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ], + ), + upper = sparse( + N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ], + ), + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + marginal3 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ], + ), + upper = sparse( + N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ], + ), + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + implicit_mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + source_dims, + (marginal1, marginal2, marginal3), + ) prop = FiniteTimeSafety([(i, 3, j) for i in 1:3 for j in 1:3], 10) spec = Specification(prop, Pessimistic, Maximize) @@ -1715,88 +1713,157 @@ using Random: MersenneTwister action_vars = (1,) # Explicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1 0 0 1 0 0 1 0 0 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 0 1 0 0 1 0 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 0 0 1 0 0 1 - ]), - upper = sparse(N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 1 0 0 1 0 0 1 0 0 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 0 1 0 0 1 0 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 0 0 1 0 0 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1 1 1 0 0 0 0 0 0 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 1 1 1 0 0 0 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 0 0 0 0 0 0 1 1 1 - ]), - upper = sparse(N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 1 1 1 0 0 0 0 0 0 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 1 1 1 0 0 0 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 0 0 0 0 0 0 1 1 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 0 0 0 0 0 0 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 0 0 0 0 0 0 0 0 0 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 1 1 1 1 1 1 - ]), - upper = sparse(N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 0 0 0 0 0 0 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 0 0 0 0 0 0 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 1 1 1 1 1 1 - ]) - ), state_indices, action_indices, state_vars, action_vars) - - mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, (marginal1, marginal2, marginal3)) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 1 0 0 1 0 0 1 0 0 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 0 1 0 0 1 0 0 1 0 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 0 0 1 0 0 1 0 0 1 + ], + ), + upper = sparse( + N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 1 0 0 1 0 0 1 0 0 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 0 1 0 0 1 0 0 1 0 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 0 0 1 0 0 1 0 0 1 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 1 1 1 0 0 0 0 0 0 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 0 0 0 1 1 1 0 0 0 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 0 0 0 0 0 0 1 1 1 + ], + ), + upper = sparse( + N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 1 1 1 0 0 0 0 0 0 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 0 0 0 1 1 1 0 0 0 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 0 0 0 0 0 0 1 1 1 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + marginal3 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 0 0 0 0 0 0 0 0 0 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 0 0 0 0 0 0 0 0 0 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 1 1 1 1 1 1 1 1 1 + ], + ), + upper = sparse( + N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 0 0 0 0 0 0 0 0 0 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 0 0 0 0 0 0 0 0 0 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 1 1 1 1 1 1 1 1 1 + ], + ), + ), + state_indices, + action_indices, + state_vars, + action_vars, + ) + + mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + (marginal1, marginal2, marginal3), + ) # Implicit - marginal1 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 - 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 - 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 - ]), - upper = sparse(N[ - 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 - 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 - 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - marginal2 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 - 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 - 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 - ]), - upper = sparse(N[ - 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 - 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 - 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - marginal3 = Marginal(IntervalAmbiguitySets(; - lower = sparse(N[ - 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 - 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 - 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 - ]), - upper = sparse(N[ - 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 - 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 - 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 - ]) - ), state_indices, action_indices, source_dims, action_vars) - - implicit_mdp = FactoredRobustMarkovDecisionProcess(state_vars, action_vars, source_dims, (marginal1, marginal2, marginal3)) + marginal1 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//15 3//10 1//15 3//10 1//30 1//3 7//30 4//15 1//6 1//5 1//10 1//5 0 7//30 7//30 1//5 2//15 1//6 + 1//5 4//15 1//10 1//5 3//10 3//10 1//10 1//15 3//10 3//10 7//30 1//5 1//10 1//5 1//5 1//30 1//5 3//10 + 4//15 1//30 1//5 1//5 7//30 4//15 2//15 7//30 1//5 1//3 2//15 1//6 1//6 1//3 4//15 3//10 1//30 3//10 + ], + ), + upper = sparse( + N[ + 7//15 17//30 13//30 3//5 17//30 17//30 17//30 13//30 3//5 2//3 11//30 7//15 0 1//2 17//30 13//30 7//15 13//30 + 8//15 1//2 3//5 7//15 8//15 17//30 2//3 17//30 11//30 7//15 19//30 19//30 13//15 1//2 17//30 13//30 3//5 11//30 + 11//30 1//3 2//5 8//15 7//15 3//5 2//3 17//30 2//3 8//15 2//15 3//5 2//3 3//5 17//30 2//3 7//15 8//15 + ], + ), + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + marginal2 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 1//10 1//15 3//10 0 1//6 1//15 1//15 1//6 1//6 1//30 1//10 1//10 1//3 2//15 3//10 4//15 2//15 2//15 + 3//10 1//5 3//10 2//15 0 1//30 0 1//15 1//30 7//30 1//30 1//15 7//30 1//15 1//6 1//30 1//10 1//15 + 3//10 4//15 1//10 3//10 2//15 1//3 3//10 1//10 1//6 3//10 7//30 1//6 1//15 1//15 1//10 1//5 1//5 4//15 + ], + ), + upper = sparse( + N[ + 2//5 17//30 3//5 11//30 3//5 7//15 19//30 2//5 3//5 2//3 2//3 8//15 8//15 19//30 8//15 8//15 13//30 13//30 + 1//3 13//30 11//30 2//5 2//3 2//3 0 13//30 1//2 17//30 17//30 1//3 2//5 1//3 13//30 11//30 8//15 1//3 + 17//30 3//5 8//15 1//2 7//15 1//2 2//3 17//30 11//30 2//5 1//2 7//15 2//5 17//30 11//30 2//5 11//30 2//3 + ], + ), + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + marginal3 = Marginal( + IntervalAmbiguitySets(; + lower = sparse( + N[ + 4//15 1//5 3//10 3//10 4//15 7//30 1//5 4//15 7//30 1//6 1//5 0 1//15 1//30 3//10 1//3 2//15 1//15 + 2//15 4//15 1//10 1//30 7//30 2//15 1//15 1//30 3//10 1//3 1//5 1//10 2//15 1//30 2//15 4//15 0 4//15 + 1//5 1//3 3//10 1//10 1//15 1//10 1//30 1//5 2//15 7//30 1//3 2//15 1//10 1//6 3//10 1//5 7//30 1//30 + ], + ), + upper = sparse( + N[ + 3//5 17//30 1//2 3//5 19//30 2//5 8//15 1//3 11//30 2//5 17//30 13//30 2//5 3//5 3//5 11//30 1//2 11//30 + 3//5 2//3 13//30 19//30 1//3 2//5 17//30 7//15 11//30 3//5 19//30 7//15 2//5 8//15 17//30 11//30 19//30 13//30 + 3//5 2//3 1//2 1//2 2//3 7//15 3//5 3//5 1//2 1//3 2//5 8//15 2//5 11//30 1//3 8//15 7//15 13//30 + ], + ), + ), + state_indices, + action_indices, + source_dims, + action_vars, + ) + + implicit_mdp = FactoredRobustMarkovDecisionProcess( + state_vars, + action_vars, + source_dims, + (marginal1, marginal2, marginal3), + ) prop = FiniteTimeSafety([(i, j, 3) for i in 1:3 for j in 1:3], 10) spec = Specification(prop, Pessimistic, Maximize) @@ -1812,4 +1879,4 @@ using Random: MersenneTwister end end end -end \ No newline at end of file +end diff --git a/test/sparse/imdp.jl b/test/sparse/imdp.jl index 573e9677..bfae3ba9 100644 --- a/test/sparse/imdp.jl +++ b/test/sparse/imdp.jl @@ -1,17 +1,16 @@ using Revise, Test using IntervalMDP, SparseArrays - @testset for N in [Float32, Float64, Rational{BigInt}] prob1 = IntervalAmbiguitySets(; lower = sparse(N[ - 0 1//2 + 0 1//2 1//10 3//10 - 1//5 1//10 + 1//5 1//10 ]), upper = sparse(N[ - 1//2 7//10 - 3//5 1//2 + 1//2 7//10 + 3//5 1//2 7//10 3//10 ]), ) @@ -19,7 +18,7 @@ using IntervalMDP, SparseArrays prob2 = IntervalAmbiguitySets(; lower = sparse(N[ 1//10 1//5 - 1//5 3//10 + 1//5 3//10 3//10 2//5 ]), upper = sparse(N[ @@ -39,7 +38,7 @@ using IntervalMDP, SparseArrays 0 0 0 0 1 1 - ]) + ]), ) transition_probs = [prob1, prob2, prob3] @@ -53,11 +52,19 @@ using IntervalMDP, SparseArrays @testset "bellman" begin V = N[1, 2, 3] Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true) - @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] + @test Vres ≈ N[ + (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, + (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3, + 1 * 3, + ] Vres = similar(Vres) IntervalMDP.bellman!(Vres, V, mdp; upper_bound = false, maximize = true) - @test Vres ≈ N[(1//2) * 1 + (3//10) * 2 + (1//5) * 3, (3//10) * 1 + (3//10) * 2 + (2//5) * 3, 1 * 3] + @test Vres ≈ N[ + (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, + (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3, + 1 * 3, + ] end @testset "explicit sink state" begin @@ -633,4 +640,4 @@ using IntervalMDP, SparseArrays @test res ≈ res_implicit end end -end \ No newline at end of file +end diff --git a/test/sparse/sparse.jl b/test/sparse/sparse.jl index 9cfb2af8..9ee97ab5 100644 --- a/test/sparse/sparse.jl +++ b/test/sparse/sparse.jl @@ -1,11 +1,5 @@ -test_files = [ - "bellman.jl", - "vi.jl", - "imdp.jl", - "synthesis.jl", - "factored.jl" -] +test_files = ["bellman.jl", "vi.jl", "imdp.jl", "synthesis.jl", "factored.jl"] for f in test_files @testset "sparse/$f" include(f) diff --git a/test/sparse/synthesis.jl b/test/sparse/synthesis.jl index a3a7fc1b..65f8fb50 100644 --- a/test/sparse/synthesis.jl +++ b/test/sparse/synthesis.jl @@ -37,7 +37,7 @@ prob3 = IntervalAmbiguitySets(; 0.0 0.0 0.0 0.0 1.0 1.0 - ]) + ]), ) transition_probs = [prob1, prob2, prob3] diff --git a/test/sparse/vi.jl b/test/sparse/vi.jl index 1d4eae8d..27261a91 100644 --- a/test/sparse/vi.jl +++ b/test/sparse/vi.jl @@ -4,14 +4,14 @@ using IntervalMDP, SparseArrays @testset for N in [Float32, Float64, Rational{BigInt}] prob = IntervalAmbiguitySets(; lower = sparse_hcat( - SparseVector(3, [2, 3], N[1//10, 1//5]), - SparseVector(3, [1, 2, 3], N[1//2, 3//10, 1//10]), - SparseVector(3, [3], N[1//1]), + SparseVector(3, [2, 3], N[1 // 10, 1 // 5]), + SparseVector(3, [1, 2, 3], N[1 // 2, 3 // 10, 1 // 10]), + SparseVector(3, [3], N[1 // 1]), ), upper = sparse_hcat( - SparseVector(3, [1, 2, 3], N[1//2, 3//5, 7//10]), - SparseVector(3, [1, 2, 3], N[7//10, 1//2, 3//10]), - SparseVector(3, [3], N[1//1]), + SparseVector(3, [1, 2, 3], N[1 // 2, 3 // 5, 7 // 10]), + SparseVector(3, [1, 2, 3], N[7 // 10, 1 // 2, 3 // 10]), + SparseVector(3, [3], N[1 // 1]), ), ) @@ -124,4 +124,4 @@ using IntervalMDP, SparseArrays problem = VerificationProblem(mc, spec) V_conv, _, u = solve(problem) @test maximum(u) <= N(1//1_000_000) -end \ No newline at end of file +end From 5831fa2692f7f7b7dc4c7af6596d6f90bb7d0214 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 1 Oct 2025 17:01:43 +0200 Subject: [PATCH 58/71] Update FormatCheck.yml --- .github/workflows/FormatCheck.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml index b7394d59..bd98b916 100644 --- a/.github/workflows/FormatCheck.yml +++ b/.github/workflows/FormatCheck.yml @@ -23,11 +23,8 @@ jobs: - uses: actions/checkout@v4 - name: Install JuliaFormatter and format - # This will use the latest version by default but you can set the version like so: - # - # julia -e 'using Pkg; Pkg.add(PackageSpec(name="JuliaFormatter", version="0.13.0"))' run: | - julia -e 'using Pkg; Pkg.add(PackageSpec(name="JuliaFormatter", version="1.0.62"))' + julia -e 'using Pkg; Pkg.add(PackageSpec(name="JuliaFormatter", version="2.1.6"))' julia -e 'using JuliaFormatter; format(".", verbose=true)' - name: Format check run: | From deb5651830957b06c86c82679d7762ac80a9950f Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 1 Oct 2025 17:01:52 +0200 Subject: [PATCH 59/71] Update compat bounds for StyledStrings --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 33c6b2b7..d15ee8de 100644 --- a/Project.toml +++ b/Project.toml @@ -32,7 +32,7 @@ JSON = "0.21.4" JuMP = "1.29.0" LLVM = "7, 8, 9" NCDatasets = "0.13, 0.14" -StyledStrings = "1.11.0" +StyledStrings = "1.0.3" julia = "1.9" [extras] From 9b72a99121b0720749bf4a0a78da12896aad6c95 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 1 Oct 2025 17:07:06 +0200 Subject: [PATCH 60/71] Fix tests --- test/base/synthesis.jl | 10 +++++----- test/sparse/synthesis.jl | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/test/base/synthesis.jl b/test/base/synthesis.jl index 52f9526a..e2576f4c 100644 --- a/test/base/synthesis.jl +++ b/test/base/synthesis.jl @@ -52,17 +52,17 @@ problem = ControlSynthesisProblem(mdp, spec) sol = solve(problem) policy, V, k, res = sol +@test strategy(sol) == policy +@test value_function(sol) == V +@test num_iterations(sol) == k +@test residual(sol) == res + @test policy isa TimeVaryingStrategy @test time_length(policy) == 10 for k in 1:time_length(policy) @test policy[k] == [(1,), (2,), (1,)] end -@test strategy(sol) == policy -@test value_function(sol) == V -@test num_iterations(sol) == k -@test residual(sol) == res - # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP problem = VerificationProblem(mdp, spec, policy) V_mc, k, res = solve(problem) diff --git a/test/sparse/synthesis.jl b/test/sparse/synthesis.jl index 65f8fb50..524feb52 100644 --- a/test/sparse/synthesis.jl +++ b/test/sparse/synthesis.jl @@ -52,17 +52,17 @@ problem = ControlSynthesisProblem(mdp, spec) sol = solve(problem) policy, V, k, res = sol +@test strategy(sol) == policy +@test value_function(sol) == V +@test num_iterations(sol) == k +@test residual(sol) == res + @test policy isa TimeVaryingStrategy @test time_length(policy) == 10 for k in 1:time_length(policy) @test policy[k] == [(1,), (2,), (1,)] end -@test strategy(sol) == policy -@test value_function(sol) == V -@test num_iterations(sol) == k -@test residual(sol) == res - # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP problem = VerificationProblem(mdp, spec, policy) V_mc, k, res = solve(problem) From ac220f141d7189e6075d386668b97b29c72af210 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 1 Oct 2025 17:20:35 +0200 Subject: [PATCH 61/71] Bump compat of Julia to 1.11 (due to the use of the public kw) --- .github/workflows/CI.yml | 2 +- .github/workflows/FormatCheck.yml | 2 +- .github/workflows/documentation.yml | 2 +- Project.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 77bffd50..d6fc13ba 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -26,7 +26,7 @@ jobs: fail-fast: false matrix: version: - - '1.10' + - '1.11' - 'nightly' os: - ubuntu-latest diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml index bd98b916..05b6759c 100644 --- a/.github/workflows/FormatCheck.yml +++ b/.github/workflows/FormatCheck.yml @@ -13,7 +13,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - julia-version: [1.10.0] + julia-version: [1.11.0] julia-arch: [x86] os: [ubuntu-latest] steps: diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 2b4bc621..157a62eb 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v2 with: - version: '1.10' + version: '1.11' - uses: julia-actions/cache@v2 - name: Install dependencies run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' diff --git a/Project.toml b/Project.toml index d15ee8de..c4c4be78 100644 --- a/Project.toml +++ b/Project.toml @@ -33,7 +33,7 @@ JuMP = "1.29.0" LLVM = "7, 8, 9" NCDatasets = "0.13, 0.14" StyledStrings = "1.0.3" -julia = "1.9" +julia = "1.11" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" From 9a28d75ec899119960c256d6d491a879aa56b603 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 1 Oct 2025 21:18:00 +0200 Subject: [PATCH 62/71] Test specification.jl more thoroughly --- test/base/specification.jl | 95 +++++++++++++++++++++++++++++++++++++- 1 file changed, 94 insertions(+), 1 deletion(-) diff --git a/test/base/specification.jl b/test/base/specification.jl index 8fee8601..dde0dfe5 100644 --- a/test/base/specification.jl +++ b/test/base/specification.jl @@ -9,11 +9,25 @@ using IntervalMDP @test reach(prop) == [3] + io = IOBuffer() + show(io, MIME("text/plain"), prop) + str = String(take!(io)) + @test occursin("FiniteTimeDFAReachability", str) + @test occursin("Time horizon: 10", str) + @test occursin("Reach states: Int32[3]", str) + prop = InfiniteTimeDFAReachability([3], 1e-6) @test !IntervalMDP.isfinitetime(prop) @test convergence_eps(prop) == 1e-6 @test reach(prop) == [3] + + io = IOBuffer() + show(io, MIME("text/plain"), prop) + str = String(take!(io)) + @test occursin("InfiniteTimeDFAReachability", str) + @test occursin("Convergence threshold: 1.0e-6", str) + @test occursin("Reach states: Int32[3]", str) end @testset "reachability" begin @@ -23,17 +37,38 @@ using IntervalMDP @test reach(prop) == [CartesianIndex(3)] + io = IOBuffer() + show(io, MIME("text/plain"), prop) + str = String(take!(io)) + @test occursin("FiniteTimeReachability", str) + @test occursin("Time horizon: 10", str) + @test occursin("Reach states: CartesianIndex{1}[CartesianIndex(3,)]", str) + prop = InfiniteTimeReachability([3], 1e-6) @test !IntervalMDP.isfinitetime(prop) @test convergence_eps(prop) == 1e-6 @test reach(prop) == [CartesianIndex(3)] + io = IOBuffer() + show(io, MIME("text/plain"), prop) + str = String(take!(io)) + @test occursin("InfiniteTimeReachability", str) + @test occursin("Convergence threshold: 1.0e-6", str) + @test occursin("Reach states: CartesianIndex{1}[CartesianIndex(3,)]", str) + prop = ExactTimeReachability([3], 10) @test IntervalMDP.isfinitetime(prop) @test time_horizon(prop) == 10 @test reach(prop) == [CartesianIndex(3)] + + io = IOBuffer() + show(io, MIME("text/plain"), prop) + str = String(take!(io)) + @test occursin("ExactTimeReachability", str) + @test occursin("Time horizon: 10", str) + @test occursin("Reach states: CartesianIndex{1}[CartesianIndex(3,)]", str) end @testset "reach-avoid" begin @@ -44,6 +79,14 @@ using IntervalMDP @test reach(prop) == [CartesianIndex(3)] @test avoid(prop) == [CartesianIndex(4)] + io = IOBuffer() + show(io, MIME("text/plain"), prop) + str = String(take!(io)) + @test occursin("FiniteTimeReachAvoid", str) + @test occursin("Time horizon: 10", str) + @test occursin("Reach states: CartesianIndex{1}[CartesianIndex(3,)]", str) + @test occursin("Avoid states: CartesianIndex{1}[CartesianIndex(4,)]", str) + prop = InfiniteTimeReachAvoid([3], [4], 1e-6) @test !IntervalMDP.isfinitetime(prop) @test convergence_eps(prop) == 1e-6 @@ -51,12 +94,28 @@ using IntervalMDP @test reach(prop) == [CartesianIndex(3)] @test avoid(prop) == [CartesianIndex(4)] + io = IOBuffer() + show(io, MIME("text/plain"), prop) + str = String(take!(io)) + @test occursin("InfiniteTimeReachAvoid", str) + @test occursin("Convergence threshold: 1.0e-6", str) + @test occursin("Reach states: CartesianIndex{1}[CartesianIndex(3,)]", str) + @test occursin("Avoid states: CartesianIndex{1}[CartesianIndex(4,)]", str) + prop = ExactTimeReachAvoid([3], [4], 10) @test IntervalMDP.isfinitetime(prop) @test time_horizon(prop) == 10 @test reach(prop) == [CartesianIndex(3)] @test avoid(prop) == [CartesianIndex(4)] + + io = IOBuffer() + show(io, MIME("text/plain"), prop) + str = String(take!(io)) + @test occursin("ExactTimeReachAvoid", str) + @test occursin("Time horizon: 10", str) + @test occursin("Reach states: CartesianIndex{1}[CartesianIndex(3,)]", str) + @test occursin("Avoid states: CartesianIndex{1}[CartesianIndex(4,)]", str) end @testset "safety" begin @@ -81,12 +140,28 @@ using IntervalMDP @test reward(prop) == [1.0, 2.0, 3.0] @test discount(prop) == 0.9 + io = IOBuffer() + show(io, MIME("text/plain"), prop) + str = String(take!(io)) + @test occursin("FiniteTimeReward", str) + @test occursin("Time horizon: 10", str) + @test occursin("Reward storage: Float64, (3,)", str) + @test occursin("Discount factor: 0.9", str) + prop = InfiniteTimeReward([1.0, 2.0, 3.0], 0.9, 1e-6) @test !IntervalMDP.isfinitetime(prop) @test convergence_eps(prop) == 1e-6 @test reward(prop) == [1.0, 2.0, 3.0] @test discount(prop) == 0.9 + + io = IOBuffer() + show(io, MIME("text/plain"), prop) + str = String(take!(io)) + @test occursin("InfiniteTimeReward", str) + @test occursin("Convergence threshold: 1.0e-6", str) + @test occursin("Reward storage: Float64, (3,)", str) + @test occursin("Discount factor: 0.9", str) end @testset "expected exit time" begin @@ -95,10 +170,18 @@ using IntervalMDP @test convergence_eps(prop) == 1e-6 @test avoid(prop) == [CartesianIndex(3)] + + io = IOBuffer() + show(io, MIME("text/plain"), prop) + str = String(take!(io)) + @test occursin("ExpectedExitTime", str) + @test occursin("Convergence threshold: 1.0e-6", str) end end @testset "specification" begin + prop = FiniteTimeDFAReachability([3], 10) + # Default spec = Specification(prop) @test satisfaction_mode(spec) == Pessimistic @@ -317,7 +400,7 @@ end @test_throws ArgumentError VerificationProblem(mc, spec, tv_strat) end - # Convergence epsilon must be a positive number + # Convergence threshold must be a positive number @testset "convergence epsilon" begin prop = InfiniteTimeDFAReachability([2], 0.0) spec = Specification(prop) @@ -616,4 +699,14 @@ end spec = Specification(prop) @test_throws DimensionMismatch VerificationProblem(mc, spec) end + + @testset "incompatible model and specification" begin + prop = FiniteTimeDFAReachability([2], 10) + spec = Specification(prop) + @test_throws ArgumentError VerificationProblem(mc, spec) + + prop = FiniteTimeReachability([2], 10) + spec = Specification(prop) + @test_throws ArgumentError VerificationProblem(prod_proc, spec) + end end From c5c6fcd1f6ee1161e9d797a24bc7d18df3732279 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 1 Oct 2025 21:41:01 +0200 Subject: [PATCH 63/71] Test show for AbstractSafety and Specification --- src/robust_value_iteration.jl | 3 ++- test/base/specification.jl | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/robust_value_iteration.jl b/src/robust_value_iteration.jl index 3a2ebeb1..ae532665 100644 --- a/src/robust_value_iteration.jl +++ b/src/robust_value_iteration.jl @@ -1,7 +1,8 @@ abstract type TerminationCriteria end function termination_criteria(spec::Specification) prop = system_property(spec) - return termination_criteria(prop, Val(isfinitetime(prop))) + ft = isfinitetime(prop) + return termination_criteria(prop, Val(ft)) end struct FixedIterationsCriteria{T <: Integer} <: TerminationCriteria diff --git a/test/base/specification.jl b/test/base/specification.jl index dde0dfe5..a758b18e 100644 --- a/test/base/specification.jl +++ b/test/base/specification.jl @@ -125,11 +125,25 @@ using IntervalMDP @test avoid(prop) == [CartesianIndex(3)] + io = IOBuffer() + show(io, MIME("text/plain"), prop) + str = String(take!(io)) + @test occursin("FiniteTimeSafety", str) + @test occursin("Time horizon: 10", str) + @test occursin("Avoid states: CartesianIndex{1}[CartesianIndex(3,)]", str) + prop = InfiniteTimeSafety([3], 1e-6) @test !IntervalMDP.isfinitetime(prop) @test convergence_eps(prop) == 1e-6 @test avoid(prop) == [CartesianIndex(3)] + + io = IOBuffer() + show(io, MIME("text/plain"), prop) + str = String(take!(io)) + @test occursin("InfiniteTimeSafety", str) + @test occursin("Convergence threshold: 1.0e-6", str) + @test occursin("Avoid states: CartesianIndex{1}[CartesianIndex(3,)]", str) end @testset "reward" begin @@ -215,6 +229,14 @@ end @test satisfaction_mode(spec) == Optimistic @test strategy_mode(spec) == Minimize @test system_property(spec) == prop + + io = IOBuffer() + show(io, MIME("text/plain"), spec) + str = String(take!(io)) + @test occursin("Specification", str) + @test occursin("Satisfaction mode: Optimistic", str) + @test occursin("Strategy mode: Minimize", str) + @test occursin("Property: FiniteTimeDFAReachability", str) end ########## From 4de5afa11de110b4c1a08f1d1a3e801078f8c95d Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 1 Oct 2025 22:05:40 +0200 Subject: [PATCH 64/71] Test IntervalAmbiguitySets --- src/probabilities/IntervalAmbiguitySets.jl | 16 +-- test/base/base.jl | 1 + test/base/probabilities.jl | 126 ++++++++++++++++++++ test/sparse/probabilities.jl | 129 +++++++++++++++++++++ test/sparse/sparse.jl | 2 +- 5 files changed, 263 insertions(+), 11 deletions(-) create mode 100644 test/base/probabilities.jl create mode 100644 test/sparse/probabilities.jl diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index 8ee4ce53..ebb294b8 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -99,6 +99,10 @@ function compute_gap( lower::MR, upper::MR, ) where {R, MR <: SparseArrays.AbstractSparseMatrixCSC{R}} + if size(lower) != size(upper) + throw(DimensionMismatch("The lower and upper matrices must have the same size.")) + end + I, J, _ = findnz(upper) gap_nonzeros = Vector{R}(undef, length(I)) @@ -126,7 +130,7 @@ end function checkprobabilities(lower::AbstractMatrix, gap::AbstractMatrix) if size(lower) != size(gap) - throw(ArgumentError("The lower and gap matrices must have the same size.")) + throw(DimensionMismatch("The lower and gap matrices must have the same size.")) end if any(lower .< 0) @@ -147,14 +151,6 @@ function checkprobabilities(lower::AbstractMatrix, gap::AbstractMatrix) throw(ArgumentError("The gap transition probabilities must be non-negative.")) end - if any(gap .> 1) - throw( - ArgumentError( - "The gap transition probabilities must be less than or equal to 1.", - ), - ) - end - if any(lower .+ gap .> 1) throw( ArgumentError( @@ -186,7 +182,7 @@ end function checkprobabilities!(lower::AbstractSparseMatrix, gap::AbstractSparseMatrix) if size(lower) != size(gap) - throw(ArgumentError("The lower and gap matrices must have the same size.")) + throw(DimensionMismatch("The lower and gap matrices must have the same size.")) end if any(nonzeros(lower) .< 0) diff --git a/test/base/base.jl b/test/base/base.jl index 63bd9d4e..d26c65bd 100644 --- a/test/base/base.jl +++ b/test/base/base.jl @@ -1,5 +1,6 @@ test_files = [ + "probabilities.jl", "bellman.jl", "vi.jl", "imdp.jl", diff --git a/test/base/probabilities.jl b/test/base/probabilities.jl new file mode 100644 index 00000000..b14c2fd2 --- /dev/null +++ b/test/base/probabilities.jl @@ -0,0 +1,126 @@ +using Revise, Test +using IntervalMDP + +@testset for N in [Float32, Float64, Rational{BigInt}] + @testset "getters" begin + l = N[0 1//2; 1//10 3//10; 2//10 1//10] + u = N[5//10 7//10; 6//10 5//10; 7//10 3//10] + + prob = IntervalAmbiguitySets(;lower = l, upper = u) + + @test length(prob) == 2 + @test num_sets(prob) == 2 + @test num_target(prob) == 3 + + res = sum(upper, prob) # Test iteration and upper + @test res == N[6//5, 11//10, 1] + + io = IOBuffer() + show(io, MIME("text/plain"), prob) + str = String(take!(io)) + @test occursin("IntervalAmbiguitySets", str) + @test occursin("Storage type: Matrix{$N}", str) + @test occursin("Number of target states: 3", str) + @test occursin("Number of ambiguity sets: 2", str) + end + + @testset "vertex enumerator" begin + lower = N[0 1//2; 1//10 3//10; 2//10 1//10] + upper = N[5//10 7//10; 6//10 5//10; 7//10 3//10] + + prob = IntervalAmbiguitySets(;lower = lower, upper = upper) + + ambiguity_set = prob[1] # First ambiguity set + verts = IntervalMDP.vertices(ambiguity_set) + @test length(verts) == 6 + + expected_verts = N[ + 5//10 3//10 2//10 + 5//10 1//10 4//10 + 2//10 6//10 2//10 + 0 6//10 4//10 + 2//10 1//10 7//10 + 0 3//10 7//10 + ] + @test length(verts) ≥ size(expected_verts, 1) # at least the unique ones + @test all(any(v2 -> v1 ≈ v2, verts) for v1 in eachrow(expected_verts)) + + ambiguity_set = prob[2] # Second ambiguity set + verts = IntervalMDP.vertices(ambiguity_set) + @test length(verts) <= 6 # = number of permutations of 3 elements + + expected_verts = N[ # duplicates due to budget < gap for all elements + 6 // 10 3//10 1//10 + 5//10 4//10 1//10 + 5//10 3//10 2//10 + ] + @test length(verts) ≥ size(expected_verts, 1) # at least the unique ones + @test all(any(v2 -> v1 ≈ v2, verts) for v1 in eachrow(expected_verts)) + end + + @testset "check vs no check" begin + lower = N[0 1//2; 1//10 3//10; 2//10 1//10] + upper = N[5//10 7//10; 6//10 5//10; 7//10 3//10] + gap = upper - lower + + prob = IntervalAmbiguitySets(;lower = lower, upper = upper) + prob_no_check = IntervalAmbiguitySets(lower, gap, Val{false}()) + + @test prob.lower == prob_no_check.lower + @test prob.gap == prob_no_check.gap + end + + @testset "dimension mismatch" begin + lower = N[0 1//2; 1//10 3//10; 2//10 1//10] + upper = N[5//10 7//10; 6//10 5//10] # Wrong size + + @test_throws DimensionMismatch IntervalAmbiguitySets(;lower = lower, upper = upper) + + lower = N[0 1//2; 1//10 3//10; 2//10 1//10] + gap = N[5//10 7//10; 6//10 5//10] # Wrong size + + @test_throws DimensionMismatch IntervalAmbiguitySets(lower, gap) + end + + @testset "negative lower bound" begin + lower = N[0 1//2; -1//10 3//10; 2//10 1//10] # Negative entry + upper = N[5//10 7//10; 6//10 5//10; 7//10 3//10] + + @test_throws ArgumentError IntervalAmbiguitySets(;lower = lower, upper = upper) + end + + @testset "lower bound greater than one" begin + lower = N[0 1//2; 1//10 3//10; 2//10 11//10] + upper = N[5//10 7//10; 6//10 5//10; 7//10 3//10] + + @test_throws ArgumentError IntervalAmbiguitySets(;lower = lower, upper = upper) + end + + @testset "lower greater than upper" begin + lower = N[0 1//2; 1//10 3//10; 2//10 1//10] + upper = N[5//10 7//10; 6//10 2//10; 7//10 3//10] # Lower bound greater than upper bound + + @test_throws ArgumentError IntervalAmbiguitySets(;lower = lower, upper = upper) + end + + @testset "upper bound greater than one" begin + lower = N[0 1//2; 1//10 3//10; 2//10 1//10] + upper = N[5//10 7//10; 6//10 5//10; 7//10 13//10] # Entry greater than 1 + + @test_throws ArgumentError IntervalAmbiguitySets(;lower = lower, upper = upper) + end + + @testset "sum lower greater than one" begin + lower = N[0 1//2; 1//10 3//10; 6//10 1//2] # Column sums to more than 1 + upper = N[5//10 7//10; 6//10 5//10; 7//10 3//10] + + @test_throws ArgumentError IntervalAmbiguitySets(;lower = lower, upper = upper) + end + + @testset "sum upper less than one" begin + lower = N[0 1//2; 1//10 3//10; 2//10 1//10] + upper = N[1//10 7//10; 2//10 5//10; 3//10 6//10] # Column sums to less than 1 + + @test_throws ArgumentError IntervalAmbiguitySets(;lower = lower, upper = upper) + end +end \ No newline at end of file diff --git a/test/sparse/probabilities.jl b/test/sparse/probabilities.jl new file mode 100644 index 00000000..c2294854 --- /dev/null +++ b/test/sparse/probabilities.jl @@ -0,0 +1,129 @@ +using Revise, Test +using IntervalMDP, SparseArrays + +@testset for N in [Float32, Float64, Rational{BigInt}] + @testset "getters" begin + l = sparse(N[0 1//2; 1//10 3//10; 2//10 1//10]) + u = sparse(N[5//10 7//10; 6//10 5//10; 7//10 3//10]) + + prob = IntervalAmbiguitySets(;lower = l, upper = u) + + @test length(prob) == 2 + @test num_sets(prob) == 2 + @test num_target(prob) == 3 + + res = sum(upper, prob) # Test iteration and upper + @test res == N[6//5, 11//10, 1] + + io = IOBuffer() + show(io, MIME("text/plain"), prob) + str = String(take!(io)) + @test occursin("IntervalAmbiguitySets", str) + @test occursin("Storage type: ", str) + @test occursin("CSC{$N, Int64}", str) + @test occursin("Number of target states: 3", str) + @test occursin("Number of ambiguity sets: 2", str) + @test occursin("Maximum support size: 3", str) + @test occursin("Number of non-zeros: 6", str) + end + + @testset "vertex enumerator" begin + lower = sparse(N[0 1//2; 1//10 3//10; 2//10 1//10]) + upper = sparse(N[5//10 7//10; 6//10 5//10; 7//10 3//10]) + + prob = IntervalAmbiguitySets(;lower = lower, upper = upper) + + ambiguity_set = prob[1] # First ambiguity set + verts = IntervalMDP.vertices(ambiguity_set) + @test length(verts) == 6 + + expected_verts = N[ + 5//10 3//10 2//10 + 5//10 1//10 4//10 + 2//10 6//10 2//10 + 0 6//10 4//10 + 2//10 1//10 7//10 + 0 3//10 7//10 + ] + @test length(verts) ≥ size(expected_verts, 1) # at least the unique ones + @test all(any(v2 -> v1 ≈ v2, verts) for v1 in eachrow(expected_verts)) + + ambiguity_set = prob[2] # Second ambiguity set + verts = IntervalMDP.vertices(ambiguity_set) + @test length(verts) <= 6 # = number of permutations of 3 elements + + expected_verts = N[ # duplicates due to budget < gap for all elements + 6 // 10 3//10 1//10 + 5//10 4//10 1//10 + 5//10 3//10 2//10 + ] + @test length(verts) ≥ size(expected_verts, 1) # at least the unique ones + @test all(any(v2 -> v1 ≈ v2, verts) for v1 in eachrow(expected_verts)) + end + + @testset "check vs no check" begin + lower = sparse(N[0 1//2; 1//10 3//10; 2//10 1//10]) + upper = sparse(N[5//10 7//10; 6//10 5//10; 7//10 3//10]) + gap = upper - lower + + prob = IntervalAmbiguitySets(;lower = lower, upper = upper) + prob_no_check = IntervalAmbiguitySets(lower, gap, Val{false}()) + + @test prob.lower == prob_no_check.lower + @test prob.gap == prob_no_check.gap + end + + @testset "dimension mismatch" begin + lower = sparse(N[0 1//2; 1//10 3//10; 2//10 1//10]) + upper = sparse(N[5//10 7//10; 6//10 5//10]) # Wrong size + + @test_throws DimensionMismatch IntervalAmbiguitySets(;lower = lower, upper = upper) + + lower = sparse(N[0 1//2; 1//10 3//10; 2//10 1//10]) + gap = sparse(N[5//10 7//10; 6//10 5//10]) # Wrong size + + @test_throws DimensionMismatch IntervalAmbiguitySets(lower, gap) + end + + @testset "negative lower bound" begin + lower = sparse(N[0 1//2; -1//10 3//10; 2//10 1//10]) # Negative entry + upper = sparse(N[5//10 7//10; 6//10 5//10; 7//10 3//10]) + + @test_throws ArgumentError IntervalAmbiguitySets(;lower = lower, upper = upper) + end + + @testset "lower bound greater than one" begin + lower = sparse(N[0 1//2; 1//10 3//10; 2//10 11//10]) + upper = sparse(N[5//10 7//10; 6//10 5//10; 7//10 3//10]) + + @test_throws ArgumentError IntervalAmbiguitySets(;lower = lower, upper = upper) + end + + @testset "lower greater than upper" begin + lower = sparse(N[0 1//2; 1//10 3//10; 2//10 1//10]) + upper = sparse(N[5//10 7//10; 6//10 2//10; 7//10 3//10]) # Lower bound greater than upper bound + + @test_throws ArgumentError IntervalAmbiguitySets(;lower = lower, upper = upper) + end + + @testset "upper bound greater than one" begin + lower = sparse(N[0 1//2; 1//10 3//10; 2//10 1//10]) + upper = sparse(N[5//10 7//10; 6//10 5//10; 7//10 13//10]) # Entry greater than 1 + + @test_throws ArgumentError IntervalAmbiguitySets(;lower = lower, upper = upper) + end + + @testset "sum lower greater than one" begin + lower = sparse(N[0 1//2; 1//10 3//10; 6//10 1//2]) # Column sums to more than 1 + upper = sparse(N[5//10 7//10; 6//10 5//10; 7//10 3//10]) + + @test_throws ArgumentError IntervalAmbiguitySets(;lower = lower, upper = upper) + end + + @testset "sum upper less than one" begin + lower = sparse(N[0 1//2; 1//10 3//10; 2//10 1//10]) + upper = sparse(N[1//10 7//10; 2//10 5//10; 3//10 6//10]) # Column sums to less than 1 + + @test_throws ArgumentError IntervalAmbiguitySets(;lower = lower, upper = upper) + end +end \ No newline at end of file diff --git a/test/sparse/sparse.jl b/test/sparse/sparse.jl index 9ee97ab5..5cf8e3c7 100644 --- a/test/sparse/sparse.jl +++ b/test/sparse/sparse.jl @@ -1,5 +1,5 @@ -test_files = ["bellman.jl", "vi.jl", "imdp.jl", "synthesis.jl", "factored.jl"] +test_files = ["probabilities.jl", "bellman.jl", "vi.jl", "imdp.jl", "synthesis.jl", "factored.jl"] for f in test_files @testset "sparse/$f" include(f) From 108b31bb476a01117c16cbf6d451fb8623eeca52 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 1 Oct 2025 22:08:43 +0200 Subject: [PATCH 65/71] Remove length checks from Marginal (already encoded in parametric tuple lengths) --- src/probabilities/Marginal.jl | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/probabilities/Marginal.jl b/src/probabilities/Marginal.jl index 1813567c..9ad8d1da 100644 --- a/src/probabilities/Marginal.jl +++ b/src/probabilities/Marginal.jl @@ -80,22 +80,6 @@ function checkindices( source_dims, action_vars, ) - if length(state_indices) != length(source_dims) - throw( - ArgumentError( - "Length of state indices must match length of source dimensions.", - ), - ) - end - - if length(action_indices) != length(action_vars) - throw( - ArgumentError( - "Length of action indices must match length of action dimensions.", - ), - ) - end - if any(state_indices .<= 0) throw(ArgumentError("State indices must be positive.")) end From 9d239e6b37c7c4ff1f77647afdb73dd45a871b75 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 1 Oct 2025 22:41:56 +0200 Subject: [PATCH 66/71] Fix and improve sanity checks for sparse IntervalAmbiguitySets --- src/probabilities/IntervalAmbiguitySets.jl | 10 +++++++++- test/base/probabilities.jl | 2 +- test/sparse/probabilities.jl | 14 +++++++++++++- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/probabilities/IntervalAmbiguitySets.jl b/src/probabilities/IntervalAmbiguitySets.jl index ebb294b8..59e21fd6 100644 --- a/src/probabilities/IntervalAmbiguitySets.jl +++ b/src/probabilities/IntervalAmbiguitySets.jl @@ -180,11 +180,19 @@ function checkprobabilities(lower::AbstractMatrix, gap::AbstractMatrix) end end -function checkprobabilities!(lower::AbstractSparseMatrix, gap::AbstractSparseMatrix) +function checkprobabilities(lower::MR, gap::MR) where {R, MR <: AbstractSparseMatrix{R}} if size(lower) != size(gap) throw(DimensionMismatch("The lower and gap matrices must have the same size.")) end + if SparseArrays.getcolptr(lower) != SparseArrays.getcolptr(gap) + throw(DimensionMismatch("The lower and gap matrices must have the same column structure.")) + end + + if SparseArrays.rowvals(lower) != SparseArrays.rowvals(gap) + throw(DimensionMismatch("The lower and gap matrices must have the same row structure.")) + end + if any(nonzeros(lower) .< 0) throw( ArgumentError("The lower bound transition probabilities must be non-negative."), diff --git a/test/base/probabilities.jl b/test/base/probabilities.jl index b14c2fd2..0452c9c5 100644 --- a/test/base/probabilities.jl +++ b/test/base/probabilities.jl @@ -112,7 +112,7 @@ using IntervalMDP @testset "sum lower greater than one" begin lower = N[0 1//2; 1//10 3//10; 6//10 1//2] # Column sums to more than 1 - upper = N[5//10 7//10; 6//10 5//10; 7//10 3//10] + upper = N[5//10 7//10; 6//10 5//10; 7//10 1//2] @test_throws ArgumentError IntervalAmbiguitySets(;lower = lower, upper = upper) end diff --git a/test/sparse/probabilities.jl b/test/sparse/probabilities.jl index c2294854..46531f6b 100644 --- a/test/sparse/probabilities.jl +++ b/test/sparse/probabilities.jl @@ -85,6 +85,18 @@ using IntervalMDP, SparseArrays @test_throws DimensionMismatch IntervalAmbiguitySets(lower, gap) end + @testset "structure mismatch" begin + lower = sparse(N[0 1//2; 1//10 3//10; 2//10 1//10]) + gap = sparse(N[5//10 7//10; 6//10 5//10; 0 0]) # Different colptr + + @test_throws DimensionMismatch IntervalAmbiguitySets(lower, gap) + + lower = sparse(N[0 1//2; 1//10 3//10; 2//10 1//10]) + gap = sparse(N[5//10 7//10; 6//10 5//10; 0 1//10]) # Same colptr but different rowvals + + @test_throws DimensionMismatch IntervalAmbiguitySets(lower, gap) + end + @testset "negative lower bound" begin lower = sparse(N[0 1//2; -1//10 3//10; 2//10 1//10]) # Negative entry upper = sparse(N[5//10 7//10; 6//10 5//10; 7//10 3//10]) @@ -115,7 +127,7 @@ using IntervalMDP, SparseArrays @testset "sum lower greater than one" begin lower = sparse(N[0 1//2; 1//10 3//10; 6//10 1//2]) # Column sums to more than 1 - upper = sparse(N[5//10 7//10; 6//10 5//10; 7//10 3//10]) + upper = sparse(N[5//10 7//10; 6//10 5//10; 7//10 1//2]) @test_throws ArgumentError IntervalAmbiguitySets(;lower = lower, upper = upper) end From ef5dffe4244befa3cd42b9a56df72c137a92f289 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 13 Oct 2025 14:57:49 +0200 Subject: [PATCH 67/71] Add DFA Safety properties --- docs/src/reference/specifications.md | 12 +++ docs/src/specifications.md | 2 +- src/IntervalMDP.jl | 1 + src/specification.jl | 136 ++++++++++++++++++++++++++- test/base/product.jl | 45 +++++++++ test/base/specification.jl | 80 ++++++++++++++++ 6 files changed, 274 insertions(+), 2 deletions(-) diff --git a/docs/src/reference/specifications.md b/docs/src/reference/specifications.md index e03cc35d..858e698e 100644 --- a/docs/src/reference/specifications.md +++ b/docs/src/reference/specifications.md @@ -93,4 +93,16 @@ time_horizon(prop::FiniteTimeDFAReachability) InfiniteTimeDFAReachability reach(prop::InfiniteTimeDFAReachability) convergence_eps(prop::InfiniteTimeDFAReachability) +``` + +## DFA Safety + +```@docs +FiniteTimeDFASafety +avoid(prop::FiniteTimeDFASafety) +time_horizon(prop::FiniteTimeDFASafety) + +InfiniteTimeDFASafety +avoid(prop::InfiniteTimeDFASafety) +convergence_eps(prop::InfiniteTimeDFASafety) ``` \ No newline at end of file diff --git a/docs/src/specifications.md b/docs/src/specifications.md index 63863d01..cf70b6d9 100644 --- a/docs/src/specifications.md +++ b/docs/src/specifications.md @@ -287,7 +287,7 @@ initial_state = 1 dfa = DFA(delta, initial_state, atomic_props) ``` -Notice that the DFA does not include the set of accepting states. This is because the accepting states does not impact the Bellman operator and therefore are defined in `DFAReachability` objects, which is shown below. +Notice that the DFA does not include the set of accepting states. This is because the accepting states do not impact the Bellman operator and therefore are defined in `DFAReachability` objects, which is shown below. ```@example using IntervalMDP # hide diff --git a/src/IntervalMDP.jl b/src/IntervalMDP.jl index 261e0a8a..65d018ca 100644 --- a/src/IntervalMDP.jl +++ b/src/IntervalMDP.jl @@ -28,6 +28,7 @@ include("specification.jl") export Property, BasicProperty, ProductProperty export FiniteTimeDFAReachability, InfiniteTimeDFAReachability +export FiniteTimeDFASafety, InfiniteTimeDFASafety export FiniteTimeReachability, InfiniteTimeReachability, ExactTimeReachability export FiniteTimeReachAvoid, InfiniteTimeReachAvoid, ExactTimeReachAvoid export FiniteTimeSafety, InfiniteTimeSafety diff --git a/src/specification.jl b/src/specification.jl index cf805fa3..cc1b683a 100644 --- a/src/specification.jl +++ b/src/specification.jl @@ -77,7 +77,7 @@ end """ AbstractDFAReachability -Super type for all reachability-like properties. +Super type for all DFA reachability-like properties. """ abstract type AbstractDFAReachability <: ProductProperty end @@ -204,6 +204,140 @@ function showproperty(io::IO, first_prefix, prefix, prop::InfiniteTimeDFAReachab println(io, prefix, styled"└─ Reach states: {magenta:$(reach(prop))}") end +## DFA Safety + +""" + AbstractDFASafety + +Super type for all DFA safety-like properties. +""" +abstract type AbstractDFASafety <: ProductProperty end + +function initialize!(value_function, prop::AbstractDFASafety) + @inbounds selectdim( + value_function.current, + ndims(value_function.current), + avoid(prop), + ) .= -1.0 +end + +function step_postprocess_value_function!(value_function, prop::AbstractDFASafety) + @inbounds selectdim( + value_function.current, + ndims(value_function.current), + avoid(prop), + ) .= -1.0 +end + +function postprocess_value_function!(value_function, ::AbstractDFASafety) + value_function.current .+= 1.0 +end + +""" + FiniteTimeDFASafety{VT <: Vector{<:Integer}, T <: Integer} + +Finite time Safety specified by a set of target/terminal states and a time horizon. +That is, denote a trace by ``z_1 z_2 z_3 \\cdots`` with ``z_k = (s_k, q_k)`` then if ``T`` is the set of target states and ``H`` is the time horizon, +the property is +```math + \\mathbb{P}(\\exists k = \\{0, \\ldots, H\\}, q_k \\in T). +``` +""" +struct FiniteTimeDFASafety{VT <: Vector{<:Int32}, T <: Integer} <: + AbstractDFASafety + avoid::VT + time_horizon::T +end + +function FiniteTimeDFASafety(avoid::Vector{<:Integer}, time_horizon) + avoid = Int32.(avoid) + return FiniteTimeDFASafety(avoid, time_horizon) +end + +function checkproperty(prop::FiniteTimeDFASafety, system, strategy) + checktimehorizon(prop, strategy) + checkproperty(prop, system) +end + +function checkproperty(prop::FiniteTimeDFASafety, system) + checkstatebounds(avoid(prop), system) +end + +isfinitetime(prop::FiniteTimeDFASafety) = true + +""" + time_horizon(prop::FiniteTimeDFASafety) + +Return the time horizon of a finite time DFA safety property. +""" +time_horizon(prop::FiniteTimeDFASafety) = prop.time_horizon + +""" + avoid(prop::FiniteTimeDFASafety) + +Return the set of DFA states with respect to which to compute safety for a finite time DFA safety property. +""" +avoid(prop::FiniteTimeDFASafety) = prop.avoid + +function showproperty(io::IO, first_prefix, prefix, prop::FiniteTimeDFASafety) + println(io, first_prefix, styled"{code:FiniteTimeDFASafety}") + println(io, prefix, styled"├─ Time horizon: {magenta:$(time_horizon(prop))}") + println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(prop))}") +end + +""" + InfiniteTimeDFASafety{VT <: Vector{<:Integer}, R <: Real} + +`InfiniteTimeDFASafety` is similar to [`FiniteTimeDFASafety`](@ref) except that the time horizon is infinite, i.e., ``H = \\infty``. +In practice it means, performing the value iteration until the value function has converged, defined by some threshold `convergence_eps`. +The convergence threshold is that the largest value of the most recent Bellman residual is less than `convergence_eps`. +""" +struct InfiniteTimeDFASafety{VT <: Vector{<:Int32}, R <: Real} <: + AbstractDFASafety + avoid::VT + convergence_eps::R +end + +function InfiniteTimeDFASafety(avoid::Vector{<:Integer}, convergence_eps) + avoid = Int32.(avoid) + return InfiniteTimeDFASafety(avoid, convergence_eps) +end + +function checkproperty(prop::InfiniteTimeDFASafety, system, strategy) + checkconvergence(prop, strategy) + checkproperty(prop, system) +end + +function checkproperty(prop::InfiniteTimeDFASafety, system) + checkstatebounds(avoid(prop), system) +end + +isfinitetime(prop::InfiniteTimeDFASafety) = false + +""" + convergence_eps(prop::InfiniteTimeDFASafety) + +Return the convergence threshold of an infinite time DFA safety property. +""" +convergence_eps(prop::InfiniteTimeDFASafety) = prop.convergence_eps + +""" + avoid(prop::InfiniteTimeDFASafety) + +Return the set of DFA states with respect to which to compute safety for a infinite time DFA safety property. +""" +avoid(prop::InfiniteTimeDFASafety) = prop.avoid + +function showproperty(io::IO, first_prefix, prefix, prop::InfiniteTimeDFASafety) + println(io, first_prefix, styled"{code:InfiniteTimeDFASafety}") + println( + io, + prefix, + styled"├─ Convergence threshold: {magenta:$(convergence_eps(prop))}", + ) + println(io, prefix, styled"└─ Avoid states: {magenta:$(avoid(prop))}") +end + ## Reachability """ diff --git a/test/base/product.jl b/test/base/product.jl index f8c23c0e..bcd92478 100644 --- a/test/base/product.jl +++ b/test/base/product.jl @@ -226,6 +226,51 @@ end @test V_conv ≈ V_mc end + + @testset "finite time safety" begin + prop = FiniteTimeDFASafety([2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = ControlSynthesisProblem(prod_proc, spec) + + policy, V_fixed_it1, k, res = solve(problem) + + @test all(V_fixed_it1 .>= 0) + @test k == 10 + @test V_fixed_it1[:, 2] == N[1, 1, 1] + + problem = VerificationProblem(prod_proc, spec, policy) + V_mc, k, res = solve(problem) + + @test V_fixed_it1 ≈ V_mc + + prop = FiniteTimeDFASafety([2], 11) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(prod_proc, spec) + + V_fixed_it2, k, res = solve(problem) + + @test all(V_fixed_it2 .>= 0) + @test k == 11 + @test V_fixed_it2[:, 2] == N[1, 1, 1] + @test all(V_fixed_it2 .<= V_fixed_it1) + end + + @testset "infinite time safety" begin + prop = InfiniteTimeDFASafety([2], 1e-3) + spec = Specification(prop, Pessimistic, Maximize) + problem = ControlSynthesisProblem(prod_proc, spec) + + policy, V_conv, k, res = solve(problem) + + @test all(V_conv .>= 0) + @test maximum(res) <= 1e-3 + @test V_conv[:, 2] == N[1, 1, 1] + + problem = VerificationProblem(prod_proc, spec, policy) + V_mc, k, res = solve(problem) + + @test V_conv ≈ V_mc + end end end end diff --git a/test/base/specification.jl b/test/base/specification.jl index a758b18e..0af502b5 100644 --- a/test/base/specification.jl +++ b/test/base/specification.jl @@ -30,6 +30,34 @@ using IntervalMDP @test occursin("Reach states: Int32[3]", str) end + @testset "DFA safety" begin + prop = FiniteTimeDFASafety([3], 10) + @test IntervalMDP.isfinitetime(prop) + @test time_horizon(prop) == 10 + + @test reach(prop) == [3] + + io = IOBuffer() + show(io, MIME("text/plain"), prop) + str = String(take!(io)) + @test occursin("FiniteTimeDFASafety", str) + @test occursin("Time horizon: 10", str) + @test occursin("Avoid states: Int32[3]", str) + + prop = InfiniteTimeDFASafety([3], 1e-6) + @test !IntervalMDP.isfinitetime(prop) + @test convergence_eps(prop) == 1e-6 + + @test reach(prop) == [3] + + io = IOBuffer() + show(io, MIME("text/plain"), prop) + str = String(take!(io)) + @test occursin("InfiniteTimeDFASafety", str) + @test occursin("Convergence threshold: 1.0e-6", str) + @test occursin("Avoid states: Int32[3]", str) + end + @testset "reachability" begin prop = FiniteTimeReachability([3], 10) @test IntervalMDP.isfinitetime(prop) @@ -290,6 +318,18 @@ end spec = Specification(prop) @test_throws DomainError VerificationProblem(prod_proc, spec, tv_prod_strat) + prop = FiniteTimeDFASafety([2], 0) + spec = Specification(prop) + @test_throws DomainError VerificationProblem(prod_proc, spec) + + prop = FiniteTimeDFASafety([2], -1) + spec = Specification(prop) + @test_throws DomainError VerificationProblem(prod_proc, spec) + + prop = FiniteTimeDFASafety([2], 0) + spec = Specification(prop) + @test_throws DomainError VerificationProblem(prod_proc, spec, tv_prod_strat) + prop = FiniteTimeReachability([3], 0) spec = Specification(prop) @test_throws DomainError VerificationProblem(mc, spec) @@ -373,6 +413,14 @@ end spec = Specification(prop) @test_throws ArgumentError VerificationProblem(prod_proc, spec, tv_prod_strat) + prop = FiniteTimeDFASafety([2], 2) + spec = Specification(prop) + @test_throws ArgumentError VerificationProblem(prod_proc, spec, tv_prod_strat) + + prop = FiniteTimeDFASafety([2], 4) + spec = Specification(prop) + @test_throws ArgumentError VerificationProblem(prod_proc, spec, tv_prod_strat) + prop = FiniteTimeReachability([3], 2) spec = Specification(prop) @test_throws ArgumentError VerificationProblem(mc, spec, tv_strat) @@ -432,6 +480,14 @@ end spec = Specification(prop) @test_throws DomainError VerificationProblem(prod_proc, spec) + prop = InfiniteTimeDFASafety([2], 0.0) + spec = Specification(prop) + @test_throws DomainError VerificationProblem(prod_proc, spec) + + prop = InfiniteTimeDFASafety([2], -1e-3) + spec = Specification(prop) + @test_throws DomainError VerificationProblem(prod_proc, spec) + prop = InfiniteTimeReachability([3], 0.0) spec = Specification(prop) @test_throws DomainError VerificationProblem(mc, spec) @@ -478,6 +534,10 @@ end prop = InfiniteTimeDFAReachability([2], 1e-6) spec = Specification(prop) @test_throws ArgumentError VerificationProblem(prod_proc, spec, tv_prod_strat) + + prop = InfiniteTimeDFASafety([2], 1e-6) + spec = Specification(prop) + @test_throws ArgumentError VerificationProblem(prod_proc, spec, tv_prod_strat) prop = InfiniteTimeReachability([3], 1e-6) spec = Specification(prop) @@ -513,6 +573,16 @@ end @test_throws InvalidStateError VerificationProblem(prod_proc, spec) end + @testset "DFA safety" begin + prop = FiniteTimeDFASafety([3], 10) # out-of-bounds + spec = Specification(prop) + @test_throws InvalidStateError VerificationProblem(prod_proc, spec) + + prop = FiniteTimeDFASafety([0], 10) # out-of-bounds + spec = Specification(prop) + @test_throws InvalidStateError VerificationProblem(prod_proc, spec) + end + @testset "reachability" begin prop = FiniteTimeReachability([4], 10) # out-of-bounds spec = Specification(prop) @@ -621,6 +691,16 @@ end @test_throws InvalidStateError VerificationProblem(prod_proc, spec) end + @testset "DFA safety" begin + prop = InfiniteTimeDFASafety([3], 1e-6) # out-of-bounds + spec = Specification(prop) + @test_throws InvalidStateError VerificationProblem(prod_proc, spec) + + prop = InfiniteTimeDFASafety([0], 1e-6) # out-of-bounds + spec = Specification(prop) + @test_throws InvalidStateError VerificationProblem(prod_proc, spec) + end + @testset "reachability" begin prop = InfiniteTimeReachability([4], 1e-6) # out-of-bounds spec = Specification(prop) From b4c37ca0516cff209bdc4bec06467380bb043129 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 15 Oct 2025 17:08:47 +0200 Subject: [PATCH 68/71] Fix initialization of extract_strategy! --- ext/cuda/bellman/dense.jl | 1 - ext/cuda/bellman/sparse.jl | 2 -- ext/cuda/strategy.jl | 5 +---- src/bellman.jl | 8 ++++---- src/strategy_cache.jl | 6 ++---- 5 files changed, 7 insertions(+), 15 deletions(-) diff --git a/ext/cuda/bellman/dense.jl b/ext/cuda/bellman/dense.jl index 0a250699..3c5fbd28 100644 --- a/ext/cuda/bellman/dense.jl +++ b/ext/cuda/bellman/dense.jl @@ -238,7 +238,6 @@ end v = extract_strategy_warp!( strategy_cache, action_workspace, - Vres, jₛ, action_reduce, lane, diff --git a/ext/cuda/bellman/sparse.jl b/ext/cuda/bellman/sparse.jl index 1dbb5516..0ff30716 100644 --- a/ext/cuda/bellman/sparse.jl +++ b/ext/cuda/bellman/sparse.jl @@ -318,7 +318,6 @@ end v = extract_strategy_warp!( strategy_cache, action_workspace, - Vres, jₛ, action_reduce, lane, @@ -710,7 +709,6 @@ end v = extract_strategy_warp!( strategy_cache, action_workspace, - Vres, jₛ, action_reduce, lane, diff --git a/ext/cuda/strategy.jl b/ext/cuda/strategy.jl index add95523..065850dc 100644 --- a/ext/cuda/strategy.jl +++ b/ext/cuda/strategy.jl @@ -41,7 +41,6 @@ Base.@propagate_inbounds Base.getindex(cache::GivenStrategyActiveCache, j) = @inline function extract_strategy_warp!( ::NoStrategyActiveCache, values::AbstractVector{Tv}, - V, j, action_reduce, lane, @@ -71,7 +70,6 @@ end @inline function extract_strategy_warp!( cache::TimeVaryingStrategyActiveCache{1, <:AbstractVector{Tuple{Int32}}}, values::AbstractVector{Tv}, - V, jₛ, action_reduce, lane, @@ -106,7 +104,6 @@ end @inline function extract_strategy_warp!( cache::StationaryStrategyActiveCache{1, <:AbstractVector{Tuple{Int32}}}, values::AbstractVector{Tv}, - V, jₛ, action_reduce, lane, @@ -118,7 +115,7 @@ end opt_val, opt_idx = if iszero(cache.strategy[jₛ][1]) action_neutral, one(Int32) else - V[jₛ], Int32(cache.strategy[jₛ][1]) + values[jₛ], Int32(cache.strategy[jₛ][1]) end s = lane diff --git a/src/bellman.jl b/src/bellman.jl index b5709874..d563c687 100644 --- a/src/bellman.jl +++ b/src/bellman.jl @@ -362,7 +362,7 @@ function state_bellman!( state_action_bellman(workspace, V, ambiguity_set, budget, upper_bound) end - Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, maximize) + Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, jₛ, maximize) end end @@ -510,7 +510,7 @@ function state_bellman!( state_action_bellman(workspace, V, ambiguity_sets, upper_bound) end - Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, maximize) + Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, jₛ, maximize) end end @@ -706,7 +706,7 @@ function state_bellman!( ) end - Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, maximize) + Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, jₛ, maximize) end end @@ -854,7 +854,7 @@ function state_bellman!( state_action_bellman(workspace, V, ambiguity_sets, upper_bound) end - Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, V, jₛ, maximize) + Vres[jₛ] = extract_strategy!(strategy_cache, workspace.actions, jₛ, maximize) end end diff --git a/src/strategy_cache.jl b/src/strategy_cache.jl index caef88f6..ea8cda1e 100644 --- a/src/strategy_cache.jl +++ b/src/strategy_cache.jl @@ -20,7 +20,7 @@ end construct_strategy_cache(::VerificationProblem{S, F, <:NoStrategy}) where {S, F} = NoStrategyCache() -function extract_strategy!(::NoStrategyCache, values, V, j, maximize) +function extract_strategy!(::NoStrategyCache, values, j, maximize) return maximize ? maximum(values) : minimum(values) end step_postprocess_strategy_cache!(::NoStrategyCache) = nothing @@ -75,7 +75,6 @@ cachetostrategy(strategy_cache::TimeVaryingStrategyCache) = function extract_strategy!( strategy_cache::TimeVaryingStrategyCache, values::AbstractArray{R}, - V, jₛ, maximize, ) where {R <: Real} @@ -112,14 +111,13 @@ cachetostrategy(strategy_cache::StationaryStrategyCache) = function extract_strategy!( strategy_cache::StationaryStrategyCache, values::AbstractArray{R}, - V, jₛ, maximize, ) where {R <: Real} neutral = if all(iszero.(strategy_cache.strategy[jₛ])) maximize ? typemin(R) : typemax(R), 1 else - V[jₛ], strategy_cache.strategy[jₛ] + values[jₛ], strategy_cache.strategy[jₛ] end return _extract_strategy!(strategy_cache.strategy, values, neutral, jₛ, maximize) From 4160d8d289effbe18bf534dc335524de8ceb7b1e Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 15 Oct 2025 17:09:11 +0200 Subject: [PATCH 69/71] Add appropriate tolerances to tests --- test/base/product.jl | 12 ++++++------ test/base/specification.jl | 4 ++-- test/base/synthesis.jl | 2 +- test/sparse/synthesis.jl | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/test/base/product.jl b/test/base/product.jl index bcd92478..a0c17099 100644 --- a/test/base/product.jl +++ b/test/base/product.jl @@ -170,7 +170,7 @@ end mdp = IntervalMarkovDecisionProcess(transition_probs) # Product model - just simple reachability - delta = TransitionFunction(Int32[ + delta = TransitionFunction(Int32[ # Labels on rows 1 2 2 2 ]) @@ -224,7 +224,7 @@ end problem = VerificationProblem(prod_proc, spec, policy) V_mc, k, res = solve(problem) - @test V_conv ≈ V_mc + @test V_conv ≈ V_mc atol=1e-3 end @testset "finite time safety" begin @@ -236,7 +236,7 @@ end @test all(V_fixed_it1 .>= 0) @test k == 10 - @test V_fixed_it1[:, 2] == N[1, 1, 1] + @test V_fixed_it1[:, 2] == N[0, 0, 0] problem = VerificationProblem(prod_proc, spec, policy) V_mc, k, res = solve(problem) @@ -251,7 +251,7 @@ end @test all(V_fixed_it2 .>= 0) @test k == 11 - @test V_fixed_it2[:, 2] == N[1, 1, 1] + @test V_fixed_it2[:, 2] == N[0, 0, 0] @test all(V_fixed_it2 .<= V_fixed_it1) end @@ -264,12 +264,12 @@ end @test all(V_conv .>= 0) @test maximum(res) <= 1e-3 - @test V_conv[:, 2] == N[1, 1, 1] + @test V_conv[:, 2] == N[0, 0, 0] problem = VerificationProblem(prod_proc, spec, policy) V_mc, k, res = solve(problem) - @test V_conv ≈ V_mc + @test V_conv ≈ V_mc atol=1e-3 end end end diff --git a/test/base/specification.jl b/test/base/specification.jl index 0af502b5..75805443 100644 --- a/test/base/specification.jl +++ b/test/base/specification.jl @@ -35,7 +35,7 @@ using IntervalMDP @test IntervalMDP.isfinitetime(prop) @test time_horizon(prop) == 10 - @test reach(prop) == [3] + @test avoid(prop) == [3] io = IOBuffer() show(io, MIME("text/plain"), prop) @@ -48,7 +48,7 @@ using IntervalMDP @test !IntervalMDP.isfinitetime(prop) @test convergence_eps(prop) == 1e-6 - @test reach(prop) == [3] + @test avoid(prop) == [3] io = IOBuffer() show(io, MIME("text/plain"), prop) diff --git a/test/base/synthesis.jl b/test/base/synthesis.jl index e2576f4c..5536280a 100644 --- a/test/base/synthesis.jl +++ b/test/base/synthesis.jl @@ -97,7 +97,7 @@ policy, V, k, res = solve(problem) # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP problem = VerificationProblem(mdp, spec, policy) V_mc, k, res = solve(problem) -@test V ≈ V_mc +@test V ≈ V_mc atol=1e-6 # Finite time safety prop = FiniteTimeSafety([3], 10) diff --git a/test/sparse/synthesis.jl b/test/sparse/synthesis.jl index 524feb52..e26e99f8 100644 --- a/test/sparse/synthesis.jl +++ b/test/sparse/synthesis.jl @@ -97,7 +97,7 @@ policy, V, k, res = solve(problem) # Check if the value iteration for the IMDP with the policy applied is the same as the value iteration for the original IMDP problem = VerificationProblem(mdp, spec, policy) V_mc, k, res = solve(problem) -@test V ≈ V_mc +@test V ≈ V_mc atol=1e-6 # Finite time safety prop = FiniteTimeSafety([3], 10) From 40be71f86bef8bf06182020cadbdfdbcb6abb765 Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Wed, 15 Oct 2025 17:41:28 +0200 Subject: [PATCH 70/71] Fix extract_strategy! for StationaryStrategyCache --- ext/cuda/strategy.jl | 3 ++- src/strategy_cache.jl | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ext/cuda/strategy.jl b/ext/cuda/strategy.jl index 065850dc..a389e0ca 100644 --- a/ext/cuda/strategy.jl +++ b/ext/cuda/strategy.jl @@ -115,7 +115,8 @@ end opt_val, opt_idx = if iszero(cache.strategy[jₛ][1]) action_neutral, one(Int32) else - values[jₛ], Int32(cache.strategy[jₛ][1]) + s = cache.strategy[jₛ][1] + values[s], s end s = lane diff --git a/src/strategy_cache.jl b/src/strategy_cache.jl index ea8cda1e..6761dd59 100644 --- a/src/strategy_cache.jl +++ b/src/strategy_cache.jl @@ -117,7 +117,8 @@ function extract_strategy!( neutral = if all(iszero.(strategy_cache.strategy[jₛ])) maximize ? typemin(R) : typemax(R), 1 else - values[jₛ], strategy_cache.strategy[jₛ] + s = strategy_cache.strategy[jₛ] + values[CartesianIndex(s)], s end return _extract_strategy!(strategy_cache.strategy, values, neutral, jₛ, maximize) From c1544106a8a9d05fa66f3b73ebb7379831f40f9f Mon Sep 17 00:00:00 2001 From: Frederik Baymler Mathiesen Date: Mon, 20 Oct 2025 12:08:51 +0200 Subject: [PATCH 71/71] Change CUDA compat bounds --- Project.toml | 2 +- test/Project.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index c4c4be78..05f18c3b 100644 --- a/Project.toml +++ b/Project.toml @@ -24,7 +24,7 @@ IntervalMDPCudaExt = ["Adapt", "CUDA", "GPUArrays", "LLVM"] [compat] Adapt = "4" -CUDA = "5.1" +CUDA = "5.9.1" CommonSolve = "0.2.4" GPUArrays = "10, 11" HiGHS = "1.19.0" diff --git a/test/Project.toml b/test/Project.toml index 04330c91..a5aeb5cf 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -12,7 +12,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] -CUDA = "5.1.2" +CUDA = "5.9.1" StatsBase = "0.34.2" julia = "1.9"