diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py index b6176e886..ea4407675 100644 --- a/quantecon/markov/ddp.py +++ b/quantecon/markov/ddp.py @@ -456,6 +456,73 @@ def _check_action_feasibility(self): 'violated for state {s}'.format(s=s) ) + def to_sa_pair_form(self, sparse=True): + """ + Convert this instance of `DiscreteDP` to SA-pair form + + Parameters + ---------- + sparse : bool, optional(default=True) + Should the `Q` matrix be stored as a sparse matrix? + If true the CSR format is used + + Returns + ------- + ddp_sa : DiscreteDP + The correspnoding DiscreteDP instance in SA-pair form + + Notes + ----- + If this instance is already in SA-pair form then it is returned + un-modified + """ + + if self._sa_pair: + return self + else: + s_ind, a_ind = np.where(self.R > - np.inf) + RL = self.R[s_ind, a_ind] + if sparse: + QL = sp.csr_matrix(self.Q[s_ind, a_ind]) + else: + QL = self.Q[s_ind, a_ind] + return DiscreteDP(RL, QL, self.beta, s_ind, a_ind) + + def to_product_form(self): + """ + Convert this instance of `DiscreteDP` to the "product" form. + + The product form uses the version of the init method taking + `R`, `Q` and `beta`. + + Parameters + ---------- + + Returns + ------- + ddp_sa : DiscreteDP + The correspnoding DiscreteDP instance in product form + + Notes + ----- + If this instance is already in product form then it is returned + un-modified + + """ + if self._sa_pair: + ns = self.num_states + na = self.a_indices.max() + 1 + R = np.full((ns, na), -np.inf) + R[self.s_indices, self.a_indices] = self.R + Q = np.zeros((ns, na, ns)) + if self._sparse: + _fill_dense_Q(self.s_indices, self.a_indices, self.Q.toarray(), Q) + else: + _fill_dense_Q(self.s_indices, self.a_indices, self.Q, Q) + return DiscreteDP(R, Q, self.beta) + else: + return self + def RQ_sigma(self, sigma): """ Given a policy `sigma`, return the reward vector `R_sigma` and @@ -963,6 +1030,15 @@ def backward_induction(ddp, T, v_term=None): return vs, sigmas +@jit(nopython=True) +def _fill_dense_Q(s_indices, a_indices, Q_in, Q_out): + L = Q_in.shape[0] + for i in range(L): + Q_out[s_indices[i], a_indices[i], :] = Q_in[i, :] + + return Q_out + + @jit(nopython=True) def _s_wise_max_argmax(a_indices, a_indptr, vals, out_max, out_argmax): n = len(out_max) diff --git a/quantecon/markov/tests/test_ddp.py b/quantecon/markov/tests/test_ddp.py index 3a62e43a1..dfa1991e3 100644 --- a/quantecon/markov/tests/test_ddp.py +++ b/quantecon/markov/tests/test_ddp.py @@ -10,7 +10,7 @@ import numpy as np import scipy.sparse as sparse from numpy.testing import assert_array_equal, assert_allclose, assert_raises -from nose.tools import eq_, ok_ +from nose.tools import ok_ from quantecon.markov import DiscreteDP, backward_induction @@ -126,10 +126,7 @@ def test_ddp_beta_0(): v_init = [0, 0, 0] ddp0 = DiscreteDP(R, Q, beta) - s_indices, a_indices = np.where(R > -np.inf) - R_sa = R[s_indices, a_indices] - Q_sa = Q[s_indices, a_indices] - ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices) + ddp1 = ddp0.to_sa_pair_form() methods = ['vi', 'pi', 'mpi'] for ddp in [ddp0, ddp1]: @@ -140,7 +137,6 @@ def test_ddp_beta_0(): def test_ddp_sorting(): - n, m = 2, 2 beta = 0.95 # Sorted @@ -238,8 +234,6 @@ def test_ddp_negative_inf_error(): def test_ddp_no_feasibile_action_error(): - n, m = 3, 2 - # No action is feasible at state 1 s_indices = [0, 0, 2, 2] a_indices = [0, 1, 0, 1] @@ -258,12 +252,8 @@ def test_ddp_beta_1_not_implemented_error(): beta = 1 ddp0 = DiscreteDP(R, Q, beta) - s_indices, a_indices = np.where(R > -np.inf) - R_sa = R[s_indices, a_indices] - Q_sa = Q[s_indices, a_indices] - ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices) - Q_sa_sp = sparse.csr_matrix(Q_sa) - ddp2 = DiscreteDP(R_sa, Q_sa_sp, beta, s_indices, a_indices) + ddp1 = ddp0.to_sa_pair_form() + ddp2 = ddp0.to_sa_pair_form(sparse=False) solution_methods = \ ['value_iteration', 'policy_iteration', 'modified_policy_iteration'] @@ -274,6 +264,62 @@ def test_ddp_beta_1_not_implemented_error(): assert_raises(NotImplementedError, getattr(ddp, method)) +def test_ddp_to_sa_and_to_product(): + n, m = 3, 2 + R = np.array([[0, 1], [1, 0], [-np.inf, 1]]) + Q = np.empty((n, m, n)) + Q[:] = 1/n + Q[0, 0, 0] = 0 + Q[0, 0, 1] = 2/n + beta = 0.95 + + sparse_R = np.array([0, 1, 1, 0, 1]) + _Q = np.full((5, 3), 1/3) + _Q[0, 0] = 0 + _Q[0, 1] = 2/n + sparse_Q = sparse.coo_matrix(_Q) + + ddp = DiscreteDP(R, Q, beta) + ddp_sa = ddp.to_sa_pair_form() + ddp_sa2 = ddp_sa.to_sa_pair_form() + ddp_sa3 = ddp.to_sa_pair_form(sparse=False) + ddp2 = ddp_sa.to_product_form() + ddp3 = ddp_sa2.to_product_form() + ddp4 = ddp.to_product_form() + + # make sure conversion worked + for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]: + assert_allclose(ddp_s.R, sparse_R) + # allcose doesn't work on sparse + np.max(np.abs((sparse_Q - ddp_s.Q))) < 1e-15 + assert_allclose(ddp_s.beta, beta) + + # these two will have probability 0 in state 2, action 0 b/c + # of the infeasiability in R + funky_Q = np.empty((n, m, n)) + funky_Q[:] = 1/n + funky_Q[0, 0, 0] = 0 + funky_Q[0, 0, 1] = 2/n + funky_Q[2, 0, :] = 0 + for ddp_f in [ddp2, ddp3]: + assert_allclose(ddp_f.R, ddp.R) + assert_allclose(ddp_f.Q, funky_Q) + assert_allclose(ddp_f.beta, ddp.beta) + + # this one is just the original one. + assert_allclose(ddp4.R, ddp.R) + assert_allclose(ddp4.Q, ddp.Q) + assert_allclose(ddp4.beta, ddp.beta) + + for method in ["pi", "vi", "mpi"]: + sol1 = ddp.solve(method=method) + for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]: + sol2 = ddp_other.solve(method=method) + + for k in ["v", "sigma", "num_iter"]: + assert_allclose(sol1[k], sol2[k]) + + if __name__ == '__main__': import sys import nose