Skip to content

Commit

Permalink
ENH: added routines to convert ddp between full and SA formulations (#…
Browse files Browse the repository at this point in the history
…318)

* ENH: added routines to convert ddp between full and SA formulations

* CLN: fix naming on ddp conversion routines

* TEST: test corner cases in ddp conversion routines
  • Loading branch information
sglyon authored and mmcky committed Aug 4, 2017
1 parent 08e634a commit c1a5521
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 14 deletions.
76 changes: 76 additions & 0 deletions quantecon/markov/ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,73 @@ def _check_action_feasibility(self):
'violated for state {s}'.format(s=s)
)

def to_sa_pair_form(self, sparse=True):
"""
Convert this instance of `DiscreteDP` to SA-pair form
Parameters
----------
sparse : bool, optional(default=True)
Should the `Q` matrix be stored as a sparse matrix?
If true the CSR format is used
Returns
-------
ddp_sa : DiscreteDP
The correspnoding DiscreteDP instance in SA-pair form
Notes
-----
If this instance is already in SA-pair form then it is returned
un-modified
"""

if self._sa_pair:
return self
else:
s_ind, a_ind = np.where(self.R > - np.inf)
RL = self.R[s_ind, a_ind]
if sparse:
QL = sp.csr_matrix(self.Q[s_ind, a_ind])
else:
QL = self.Q[s_ind, a_ind]
return DiscreteDP(RL, QL, self.beta, s_ind, a_ind)

def to_product_form(self):
"""
Convert this instance of `DiscreteDP` to the "product" form.
The product form uses the version of the init method taking
`R`, `Q` and `beta`.
Parameters
----------
Returns
-------
ddp_sa : DiscreteDP
The correspnoding DiscreteDP instance in product form
Notes
-----
If this instance is already in product form then it is returned
un-modified
"""
if self._sa_pair:
ns = self.num_states
na = self.a_indices.max() + 1
R = np.full((ns, na), -np.inf)
R[self.s_indices, self.a_indices] = self.R
Q = np.zeros((ns, na, ns))
if self._sparse:
_fill_dense_Q(self.s_indices, self.a_indices, self.Q.toarray(), Q)
else:
_fill_dense_Q(self.s_indices, self.a_indices, self.Q, Q)
return DiscreteDP(R, Q, self.beta)
else:
return self

def RQ_sigma(self, sigma):
"""
Given a policy `sigma`, return the reward vector `R_sigma` and
Expand Down Expand Up @@ -963,6 +1030,15 @@ def backward_induction(ddp, T, v_term=None):
return vs, sigmas


@jit(nopython=True)
def _fill_dense_Q(s_indices, a_indices, Q_in, Q_out):
L = Q_in.shape[0]
for i in range(L):
Q_out[s_indices[i], a_indices[i], :] = Q_in[i, :]

return Q_out


@jit(nopython=True)
def _s_wise_max_argmax(a_indices, a_indptr, vals, out_max, out_argmax):
n = len(out_max)
Expand Down
74 changes: 60 additions & 14 deletions quantecon/markov/tests/test_ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np
import scipy.sparse as sparse
from numpy.testing import assert_array_equal, assert_allclose, assert_raises
from nose.tools import eq_, ok_
from nose.tools import ok_

from quantecon.markov import DiscreteDP, backward_induction

Expand Down Expand Up @@ -126,10 +126,7 @@ def test_ddp_beta_0():
v_init = [0, 0, 0]

ddp0 = DiscreteDP(R, Q, beta)
s_indices, a_indices = np.where(R > -np.inf)
R_sa = R[s_indices, a_indices]
Q_sa = Q[s_indices, a_indices]
ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices)
ddp1 = ddp0.to_sa_pair_form()
methods = ['vi', 'pi', 'mpi']

for ddp in [ddp0, ddp1]:
Expand All @@ -140,7 +137,6 @@ def test_ddp_beta_0():


def test_ddp_sorting():
n, m = 2, 2
beta = 0.95

# Sorted
Expand Down Expand Up @@ -238,8 +234,6 @@ def test_ddp_negative_inf_error():


def test_ddp_no_feasibile_action_error():
n, m = 3, 2

# No action is feasible at state 1
s_indices = [0, 0, 2, 2]
a_indices = [0, 1, 0, 1]
Expand All @@ -258,12 +252,8 @@ def test_ddp_beta_1_not_implemented_error():
beta = 1

ddp0 = DiscreteDP(R, Q, beta)
s_indices, a_indices = np.where(R > -np.inf)
R_sa = R[s_indices, a_indices]
Q_sa = Q[s_indices, a_indices]
ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices)
Q_sa_sp = sparse.csr_matrix(Q_sa)
ddp2 = DiscreteDP(R_sa, Q_sa_sp, beta, s_indices, a_indices)
ddp1 = ddp0.to_sa_pair_form()
ddp2 = ddp0.to_sa_pair_form(sparse=False)

solution_methods = \
['value_iteration', 'policy_iteration', 'modified_policy_iteration']
Expand All @@ -274,6 +264,62 @@ def test_ddp_beta_1_not_implemented_error():
assert_raises(NotImplementedError, getattr(ddp, method))


def test_ddp_to_sa_and_to_product():
n, m = 3, 2
R = np.array([[0, 1], [1, 0], [-np.inf, 1]])
Q = np.empty((n, m, n))
Q[:] = 1/n
Q[0, 0, 0] = 0
Q[0, 0, 1] = 2/n
beta = 0.95

sparse_R = np.array([0, 1, 1, 0, 1])
_Q = np.full((5, 3), 1/3)
_Q[0, 0] = 0
_Q[0, 1] = 2/n
sparse_Q = sparse.coo_matrix(_Q)

ddp = DiscreteDP(R, Q, beta)
ddp_sa = ddp.to_sa_pair_form()
ddp_sa2 = ddp_sa.to_sa_pair_form()
ddp_sa3 = ddp.to_sa_pair_form(sparse=False)
ddp2 = ddp_sa.to_product_form()
ddp3 = ddp_sa2.to_product_form()
ddp4 = ddp.to_product_form()

# make sure conversion worked
for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]:
assert_allclose(ddp_s.R, sparse_R)
# allcose doesn't work on sparse
np.max(np.abs((sparse_Q - ddp_s.Q))) < 1e-15
assert_allclose(ddp_s.beta, beta)

# these two will have probability 0 in state 2, action 0 b/c
# of the infeasiability in R
funky_Q = np.empty((n, m, n))
funky_Q[:] = 1/n
funky_Q[0, 0, 0] = 0
funky_Q[0, 0, 1] = 2/n
funky_Q[2, 0, :] = 0
for ddp_f in [ddp2, ddp3]:
assert_allclose(ddp_f.R, ddp.R)
assert_allclose(ddp_f.Q, funky_Q)
assert_allclose(ddp_f.beta, ddp.beta)

# this one is just the original one.
assert_allclose(ddp4.R, ddp.R)
assert_allclose(ddp4.Q, ddp.Q)
assert_allclose(ddp4.beta, ddp.beta)

for method in ["pi", "vi", "mpi"]:
sol1 = ddp.solve(method=method)
for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]:
sol2 = ddp_other.solve(method=method)

for k in ["v", "sigma", "num_iter"]:
assert_allclose(sol1[k], sol2[k])


if __name__ == '__main__':
import sys
import nose
Expand Down

0 comments on commit c1a5521

Please sign in to comment.