Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed Categorical to work with multidim p at the logp level #3383

Merged
merged 3 commits into from
Feb 26, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
- Fixed incorrect usage of `broadcast_distribution_samples` in `DiscreteWeibull`.
- `Mixture`'s default dtype is now determined by `theano.config.floatX`.
- `dist_math.random_choice` now handles nd-arrays of category probabilities, and also handles sizes that are not `None`. Also removed unused `k` kwarg from `dist_math.random_choice`.
- Changed `Categorical.mode` to preserve all the dimensions of `p` except the last one, which encodes each category's probability.
- Changed initialization of `Categorical.p`. `p` is now normalized to sum to `1` inside `logp` and `random`, but not during initialization. This could hide negative values supplied to `p` as mentioned in #2082.
- To be able to test for negative `p` values supplied to `Categorical`, `Categorical.logp` was changed to check for `sum(self.p, axis=-1) == 1` only if `self.p` is not a `Number`, `np.ndarray`, `TensorConstant` or `SharedVariable`. These cases are automatically normalized to sum to `1`. The other condition may originate from a `step_method` proposal, where `self.p` tensor's value may be set, but must sum to 1 nevertheless. This may break old code which intialized `p` with a theano expression and relied on the default normalization to get it to sum to 1. `Categorical.logp` now also checks that the used `p` has values lower than 1.

### Deprecations

Expand Down
36 changes: 28 additions & 8 deletions pymc3/distributions/discrete.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numbers
import numpy as np
import theano
import theano.tensor as tt
Expand Down Expand Up @@ -710,11 +711,17 @@ def __init__(self, p, *args, **kwargs):
except AttributeError:
self.k = tt.shape(p)[-1]
p = tt.as_tensor_variable(floatX(p))
self.p = (p.T / tt.sum(p, -1)).T
self.mode = tt.argmax(p)

# From #2082, it may be dangerous to automatically rescale p at this
# point without checking for positiveness
self.p = p
self.mode = tt.argmax(p, axis=-1)
if self.mode.ndim == 1:
self.mode = tt.squeeze(self.mode)

def random(self, point=None, size=None):
p, k = draw_values([self.p, self.k], point=point, size=size)
p = p / np.sum(p, axis=-1, keepdims=True)

return generate_samples(random_choice,
p=p,
Expand All @@ -723,21 +730,34 @@ def random(self, point=None, size=None):
size=size)

def logp(self, value):
p = self.p
p_ = self.p
k = self.k

# Clip values before using them for indexing
value_clip = tt.clip(value, 0, k - 1)

sumto1 = theano.gradient.zero_grad(
tt.le(abs(tt.sum(p, axis=-1) - 1), 1e-5))
# We must only check that the values sum to 1 if p comes from a
# tensor variable, i.e. when p is a step_method proposal. In the other
# cases we normalize ourselves
if not isinstance(p_, (numbers.Number,
np.ndarray,
tt.TensorConstant,
tt.sharedvar.SharedVariable)):
sumto1 = theano.gradient.zero_grad(
tt.le(abs(tt.sum(p_, axis=-1) - 1), 1e-5))
p = p_
else:
p = p_ / tt.sum(p_, axis=-1, keepdims=True)
sumto1 = True

if p.ndim > 1:
a = tt.log(p[tt.arange(p.shape[0]), value_clip])
# Fancy trick to index the last axis without touching the other
a = tt.log((p.T)[value_clip].T)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There got to be a better way to do two transpose, right? If it is really necessary, maybe it is easier to do np.swapaxes or np.moveaxes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, np.moveaxes is the best option. I was not aware that it worked well with theano tensors too.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I didnt realized it works with np.moveaxes as well. So it works well with theano tensor i take it - would it be better use tt.dimshuffle? http://deeplearning.net/software/theano/library/tensor/basic.html#dimshuffle

else:
a = tt.log(p[value_clip])

return bound(a, value >= 0, value <= (k - 1), sumto1)
return bound(a, value >= 0, value <= (k - 1), sumto1,
tt.all(p_ > 0, axis=-1), tt.all(p <= 1, axis=-1))

def _repr_latex_(self, name=None, dist=None):
if dist is None:
Expand Down Expand Up @@ -1177,7 +1197,7 @@ def __init__(self, eta, cutpoints, *args, **kwargs):
tt.zeros_like(tt.shape_padright(pa[:, 0])),
pa,
tt.ones_like(tt.shape_padright(pa[:, 0]))
], axis=1)
], axis=-1)
p = p_cum[:, 1:] - p_cum[:, :-1]

super().__init__(p=p, *args, **kwargs)
Expand Down
9 changes: 8 additions & 1 deletion pymc3/tests/test_distribution_defaults.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ..model import Model
from ..distributions import DiscreteUniform, Continuous
from ..distributions import DiscreteUniform, Continuous, Categorical

import numpy as np
import pytest
Expand Down Expand Up @@ -67,3 +67,10 @@ def test_discrete_uniform_negative():
with model:
x = DiscreteUniform('x', lower=-10, upper=0)
assert model.test_point['x'] == -5


def test_categorical_mode():
model = Model()
with model:
x = Categorical('x', p=np.eye(4), shape=4)
assert np.allclose(model.test_point['x'], np.arange(4))
24 changes: 21 additions & 3 deletions pymc3/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def dirichlet_logpdf(value, a):

def categorical_logpdf(value, p):
if value >= 0 and value <= len(p):
return floatX(np.log(p[value]))
return floatX(np.log((p.T)[value]).T)
else:
return -inf

Expand All @@ -346,8 +346,10 @@ def invlogit(x, eps=sys.float_info.epsilon):

def orderedlogistic_logpdf(value, eta, cutpoints):
c = np.concatenate(([-np.inf], cutpoints, [np.inf]))
p = invlogit(eta - c[value]) - invlogit(eta - c[value + 1])
return np.log(p)
ps = np.array([invlogit(eta - cc) - invlogit(eta - cc1)
for cc, cc1 in zip(c[:-1], c[1:])])
p = ps[value]
return np.where(np.all(ps > 0), np.log(p), -np.inf)

class Simplex:
def __init__(self, n):
Expand Down Expand Up @@ -1079,6 +1081,22 @@ def test_categorical_bounds(self):
assert np.isinf(x.logp({'x': -1}))
assert np.isinf(x.logp({'x': 3}))

def test_categorical_valid_p(self):
with Model():
x = Categorical('x', p=np.array([-0.2, 0.3, 0.5]))
assert np.isinf(x.logp({'x': 0}))
assert np.isinf(x.logp({'x': 1}))
with Model():
# Hard edge case from #2082
# Early automatic normalization of p's sum would hide the negative
# entries if there is a single or pair number of negative values
# and the rest are zero
x = Categorical('x', p=np.array([-1, -1, 0, 0]))
assert np.isinf(x.logp({'x': 0}))
assert np.isinf(x.logp({'x': 1}))
assert np.isinf(x.logp({'x': 2}))
assert np.isinf(x.logp({'x': 3}))

@pytest.mark.parametrize('n', [2, 3, 4])
def test_categorical(self, n):
self.pymc3_matches_scipy(Categorical, Domain(range(n), 'int64'), {'p': Simplex(n)},
Expand Down
3 changes: 3 additions & 0 deletions pymc3/tests/test_distributions_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,9 @@ def test_probability_vector_shape(self):
p = np.ones((10, 5))
assert pm.Categorical.dist(p=p).random().shape == (10,)
assert pm.Categorical.dist(p=p).random(size=4).shape == (4, 10)
p = np.ones((3, 7, 5))
assert pm.Categorical.dist(p=p).random().shape == (3, 7)
assert pm.Categorical.dist(p=p).random(size=4).shape == (4, 3, 7)


class TestScalarParameterSamples(SeededTest):
Expand Down