From 9938ff0de06026532171ee8154ce6a94b4c4028e Mon Sep 17 00:00:00 2001 From: Spencer Lyon Date: Mon, 31 Jul 2017 14:41:32 -0700 Subject: [PATCH] ENH: added routines to convert ddp between full and SA formulations --- quantecon/markov/ddp.py | 76 ++++++++++++++++++++++++++++++ quantecon/markov/tests/test_ddp.py | 57 ++++++++++++++++------ 2 files changed, 119 insertions(+), 14 deletions(-) diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py index b6176e886..5aff0857b 100644 --- a/quantecon/markov/ddp.py +++ b/quantecon/markov/ddp.py @@ -456,6 +456,73 @@ def _check_action_feasibility(self): 'violated for state {s}'.format(s=s) ) + def to_sa_formulation(self, sparse=True): + """ + Convert this instance of `DiscreteDP` to SA-pair form + + Parameters + ---------- + sparse : bool, optional(default=True) + Should the `Q` matrix be stored as a sparse matrix. + If true the CSR format is used + + Returns + ------- + ddp_sa : DiscreteDP + The correspnoding DiscreteDP instance in SA form + + Notes + ----- + If this instance is already in SA form then it is returned + un-modified + """ + + if self._sa_pair: + return self + else: + s_ind, a_ind = np.where(np.isfinite(self.R)) + RL = self.R[s_ind, a_ind] + if sparse: + QL = sp.csr_matrix(self.Q[s_ind, a_ind]) + else: + QL = self.Q[s_ind, a_ind] + return DiscreteDP(RL, QL, self.beta, s_ind, a_ind) + + def to_full_formulation(self): + """ + Convert this instance of `DiscreteDP` to the "full" form. + + The full form uses the version of the init method taking + `R`, `Q` and `beta`. + + Parameters + ---------- + + Returns + ------- + ddp_sa : DiscreteDP + The correspnoding DiscreteDP instance in full form + + Notes + ----- + If this instance is already in full form then it is returned + un-modified + + """ + if self._sa_pair: + ns = self.num_states + na = self.a_indices.max() + 1 + R = np.zeros((ns, na)) + R[self.s_indices, self.a_indices] = self.R + Q = np.zeros((ns, na, ns)) + if self._sparse: + _fill_dense_Q(self.s_indices, self.a_indices, self.Q.toarray(), Q) + else: + _fill_dense_Q(self.s_indices, self.a_indices, self.Q, Q) + return DiscreteDP(R, Q, self.beta) + else: + return self + def RQ_sigma(self, sigma): """ Given a policy `sigma`, return the reward vector `R_sigma` and @@ -963,6 +1030,15 @@ def backward_induction(ddp, T, v_term=None): return vs, sigmas +@jit(nopython=True) +def _fill_dense_Q(s_indices, a_indices, Q_in, Q_out): + L = Q_in.shape[0] + for i in range(L): + Q_out[s_indices[i], a_indices[i], :] = Q_in[i, :] + + return Q_out + + @jit(nopython=True) def _s_wise_max_argmax(a_indices, a_indptr, vals, out_max, out_argmax): n = len(out_max) diff --git a/quantecon/markov/tests/test_ddp.py b/quantecon/markov/tests/test_ddp.py index 3a62e43a1..dc0f94672 100644 --- a/quantecon/markov/tests/test_ddp.py +++ b/quantecon/markov/tests/test_ddp.py @@ -10,7 +10,7 @@ import numpy as np import scipy.sparse as sparse from numpy.testing import assert_array_equal, assert_allclose, assert_raises -from nose.tools import eq_, ok_ +from nose.tools import ok_ from quantecon.markov import DiscreteDP, backward_induction @@ -126,10 +126,7 @@ def test_ddp_beta_0(): v_init = [0, 0, 0] ddp0 = DiscreteDP(R, Q, beta) - s_indices, a_indices = np.where(R > -np.inf) - R_sa = R[s_indices, a_indices] - Q_sa = Q[s_indices, a_indices] - ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices) + ddp1 = ddp0.to_sa_formulation() methods = ['vi', 'pi', 'mpi'] for ddp in [ddp0, ddp1]: @@ -140,7 +137,6 @@ def test_ddp_beta_0(): def test_ddp_sorting(): - n, m = 2, 2 beta = 0.95 # Sorted @@ -238,8 +234,6 @@ def test_ddp_negative_inf_error(): def test_ddp_no_feasibile_action_error(): - n, m = 3, 2 - # No action is feasible at state 1 s_indices = [0, 0, 2, 2] a_indices = [0, 1, 0, 1] @@ -258,12 +252,8 @@ def test_ddp_beta_1_not_implemented_error(): beta = 1 ddp0 = DiscreteDP(R, Q, beta) - s_indices, a_indices = np.where(R > -np.inf) - R_sa = R[s_indices, a_indices] - Q_sa = Q[s_indices, a_indices] - ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices) - Q_sa_sp = sparse.csr_matrix(Q_sa) - ddp2 = DiscreteDP(R_sa, Q_sa_sp, beta, s_indices, a_indices) + ddp1 = ddp0.to_sa_formulation() + ddp2 = ddp0.to_sa_formulation(sparse=False) solution_methods = \ ['value_iteration', 'policy_iteration', 'modified_policy_iteration'] @@ -274,6 +264,45 @@ def test_ddp_beta_1_not_implemented_error(): assert_raises(NotImplementedError, getattr(ddp, method)) +def test_ddp_to_sa_and_to_full(): + n, m = 3, 2 + R = np.array([[0, 1], [1, 0], [0, 1]]) + Q = np.empty((n, m, n)) + Q[:] = 1/n + beta = 0.95 + + sparse_R = np.array([0, 1, 1, 0, 0, 1]) + sparse_Q = sparse.coo_matrix(np.full((6, 3), 1/3)) + + ddp = DiscreteDP(R, Q, beta) + ddp_sa = ddp.to_sa_formulation() + ddp_sa2 = ddp_sa.to_sa_formulation() + ddp_sa3 = ddp.to_sa_formulation(sparse=False) + ddp2 = ddp_sa.to_full_formulation() + ddp3 = ddp_sa2.to_full_formulation() + ddp4 = ddp.to_full_formulation() + + # make sure conversion worked + for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]: + assert_allclose(ddp_s.R, sparse_R) + # allcose doesn't work on sparse + np.max(np.abs((sparse_Q - ddp_sa.Q))) < 1e-15 + assert_allclose(ddp_s.beta, beta) + + for ddp_f in [ddp2, ddp3, ddp4]: + assert_allclose(ddp_f.R, ddp.R) + assert_allclose(ddp_f.Q, ddp.Q) + assert_allclose(ddp_f.beta, ddp.beta) + + for method in ["pi", "vi", "mpi"]: + sol1 = ddp.solve(method=method) + for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]: + sol2 = ddp_other.solve(method=method) + + for k in ["v", "sigma", "num_iter"]: + assert_allclose(sol1[k], sol2[k]) + + if __name__ == '__main__': import sys import nose