ENH: added routines to convert ddp between full and SA formulations (#…

…318) * ENH: added routines to convert ddp between full and SA formulations * CLN: fix naming on ddp conversion routines * TEST: test corner cases in ddp conversion routines
QuantEcon · Aug 4, 2017 · c1a5521 · c1a5521
1 parent 08e634a
commit c1a5521
Show file tree

Hide file tree

Showing 2 changed files with 136 additions and 14 deletions.
diff --git a/quantecon/markov/ddp.py b/quantecon/markov/ddp.py
@@ -456,6 +456,73 @@ def _check_action_feasibility(self):
                     'violated for state {s}'.format(s=s)
                 )
 
+    def to_sa_pair_form(self, sparse=True):
+        """
+        Convert this instance of `DiscreteDP` to SA-pair form
+
+        Parameters
+        ----------
+        sparse : bool, optional(default=True)
+            Should the `Q` matrix be stored as a sparse matrix?
+            If true the CSR format is used
+
+        Returns
+        -------
+        ddp_sa : DiscreteDP
+            The correspnoding DiscreteDP instance in SA-pair form
+
+        Notes
+        -----
+        If this instance is already in SA-pair form then it is returned
+        un-modified
+        """
+
+        if self._sa_pair:
+            return self
+        else:
+            s_ind, a_ind = np.where(self.R > - np.inf)
+            RL = self.R[s_ind, a_ind]
+            if sparse:
+                QL = sp.csr_matrix(self.Q[s_ind, a_ind])
+            else:
+                QL = self.Q[s_ind, a_ind]
+            return DiscreteDP(RL, QL, self.beta, s_ind, a_ind)
+
+    def to_product_form(self):
+        """
+        Convert this instance of `DiscreteDP` to the "product" form.
+
+        The product form uses the version of the init method taking
+        `R`, `Q` and `beta`.
+
+        Parameters
+        ----------
+
+        Returns
+        -------
+        ddp_sa : DiscreteDP
+            The correspnoding DiscreteDP instance in product form
+
+        Notes
+        -----
+        If this instance is already in product form then it is returned
+        un-modified
+
+        """
+        if self._sa_pair:
+            ns = self.num_states
+            na = self.a_indices.max() + 1
+            R = np.full((ns, na), -np.inf)
+            R[self.s_indices, self.a_indices] = self.R
+            Q = np.zeros((ns, na, ns))
+            if self._sparse:
+                _fill_dense_Q(self.s_indices, self.a_indices, self.Q.toarray(), Q)
+            else:
+                _fill_dense_Q(self.s_indices, self.a_indices, self.Q, Q)
+            return DiscreteDP(R, Q, self.beta)
+        else:
+            return self
+
     def RQ_sigma(self, sigma):
         """
         Given a policy `sigma`, return the reward vector `R_sigma` and
@@ -963,6 +1030,15 @@ def backward_induction(ddp, T, v_term=None):
     return vs, sigmas
 
 
+@jit(nopython=True)
+def _fill_dense_Q(s_indices, a_indices, Q_in, Q_out):
+    L = Q_in.shape[0]
+    for i in range(L):
+        Q_out[s_indices[i], a_indices[i], :] = Q_in[i, :]
+
+    return Q_out
+
+
 @jit(nopython=True)
 def _s_wise_max_argmax(a_indices, a_indptr, vals, out_max, out_argmax):
     n = len(out_max)

diff --git a/quantecon/markov/tests/test_ddp.py b/quantecon/markov/tests/test_ddp.py
@@ -10,7 +10,7 @@
 import numpy as np
 import scipy.sparse as sparse
 from numpy.testing import assert_array_equal, assert_allclose, assert_raises
-from nose.tools import eq_, ok_
+from nose.tools import ok_
 
 from quantecon.markov import DiscreteDP, backward_induction
 
@@ -126,10 +126,7 @@ def test_ddp_beta_0():
     v_init = [0, 0, 0]
 
     ddp0 = DiscreteDP(R, Q, beta)
-    s_indices, a_indices = np.where(R > -np.inf)
-    R_sa = R[s_indices, a_indices]
-    Q_sa = Q[s_indices, a_indices]
-    ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices)
+    ddp1 = ddp0.to_sa_pair_form()
     methods = ['vi', 'pi', 'mpi']
 
     for ddp in [ddp0, ddp1]:
@@ -140,7 +137,6 @@ def test_ddp_beta_0():
 
 
 def test_ddp_sorting():
-    n, m = 2, 2
     beta = 0.95
 
     # Sorted
@@ -238,8 +234,6 @@ def test_ddp_negative_inf_error():
 
 
 def test_ddp_no_feasibile_action_error():
-    n, m = 3, 2
-
     # No action is feasible at state 1
     s_indices = [0, 0, 2, 2]
     a_indices = [0, 1, 0, 1]
@@ -258,12 +252,8 @@ def test_ddp_beta_1_not_implemented_error():
     beta = 1
 
     ddp0 = DiscreteDP(R, Q, beta)
-    s_indices, a_indices = np.where(R > -np.inf)
-    R_sa = R[s_indices, a_indices]
-    Q_sa = Q[s_indices, a_indices]
-    ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices)
-    Q_sa_sp = sparse.csr_matrix(Q_sa)
-    ddp2 = DiscreteDP(R_sa, Q_sa_sp, beta, s_indices, a_indices)
+    ddp1 = ddp0.to_sa_pair_form()
+    ddp2 = ddp0.to_sa_pair_form(sparse=False)
 
     solution_methods = \
         ['value_iteration', 'policy_iteration', 'modified_policy_iteration']
@@ -274,6 +264,62 @@ def test_ddp_beta_1_not_implemented_error():
             assert_raises(NotImplementedError, getattr(ddp, method))
 
 
+def test_ddp_to_sa_and_to_product():
+    n, m = 3, 2
+    R = np.array([[0, 1], [1, 0], [-np.inf, 1]])
+    Q = np.empty((n, m, n))
+    Q[:] = 1/n
+    Q[0, 0, 0] = 0
+    Q[0, 0, 1] = 2/n
+    beta = 0.95
+
+    sparse_R = np.array([0, 1, 1, 0, 1])
+    _Q = np.full((5, 3), 1/3)
+    _Q[0, 0] = 0
+    _Q[0, 1] = 2/n
+    sparse_Q = sparse.coo_matrix(_Q)
+
+    ddp = DiscreteDP(R, Q, beta)
+    ddp_sa = ddp.to_sa_pair_form()
+    ddp_sa2 = ddp_sa.to_sa_pair_form()
+    ddp_sa3 = ddp.to_sa_pair_form(sparse=False)
+    ddp2 = ddp_sa.to_product_form()
+    ddp3 = ddp_sa2.to_product_form()
+    ddp4 = ddp.to_product_form()
+
+    # make sure conversion worked
+    for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]:
+        assert_allclose(ddp_s.R, sparse_R)
+        # allcose doesn't work on sparse
+        np.max(np.abs((sparse_Q - ddp_s.Q))) < 1e-15
+        assert_allclose(ddp_s.beta, beta)
+
+    # these two will have probability 0 in state 2, action 0 b/c
+    # of the infeasiability in R
+    funky_Q = np.empty((n, m, n))
+    funky_Q[:] = 1/n
+    funky_Q[0, 0, 0] = 0
+    funky_Q[0, 0, 1] = 2/n
+    funky_Q[2, 0, :] = 0
+    for ddp_f in [ddp2, ddp3]:
+        assert_allclose(ddp_f.R, ddp.R)
+        assert_allclose(ddp_f.Q, funky_Q)
+        assert_allclose(ddp_f.beta, ddp.beta)
+
+    # this one is just the original one.
+    assert_allclose(ddp4.R, ddp.R)
+    assert_allclose(ddp4.Q, ddp.Q)
+    assert_allclose(ddp4.beta, ddp.beta)
+
+    for method in ["pi", "vi", "mpi"]:
+        sol1 = ddp.solve(method=method)
+        for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]:
+            sol2 = ddp_other.solve(method=method)
+
+            for k in ["v", "sigma", "num_iter"]:
+                assert_allclose(sol1[k], sol2[k])
+
+
 if __name__ == '__main__':
     import sys
     import nose