Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update model_comp examples and ppc_w to work with idata #4042

Merged
merged 6 commits into from
Aug 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
### Maintenance
- Mentioned the way to do any random walk with `theano.tensor.cumsum()` in `GaussianRandomWalk` docstrings (see [#4048](https://github.com/pymc-devs/pymc3/pull/4048)).


### Documentation

### New features
- `sample_posterior_predictive_w` can now feed on `xarray.Dataset` - e.g. from `InferenceData.posterior`. (see [#4042](https://github.com/pymc-devs/pymc3/pull/4042))


## PyMC3 3.9.3 (11 August 2020)

Expand All @@ -27,6 +30,7 @@

_NB: The `docs/*` folder is still removed from the tarball due to an upload size limit on PyPi._


## PyMC3 3.9.2 (24 June 2020)
### Maintenance
- Warning added in GP module when `input_dim` is lower than the number of columns in `X` to compute the covariance function (see [#3974](https://github.com/pymc-devs/pymc3/pull/3974)).
Expand Down
322 changes: 234 additions & 88 deletions docs/source/notebooks/model_averaging.ipynb

Large diffs are not rendered by default.

504 changes: 217 additions & 287 deletions docs/source/notebooks/model_comparison.ipynb

Large diffs are not rendered by default.

109 changes: 35 additions & 74 deletions pymc3/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,7 @@ def _print_step_hierarchy(s, level=0):
else:
varnames = ", ".join(
[
get_untransformed_name(v.name)
if is_transformed_name(v.name)
else v.name
get_untransformed_name(v.name) if is_transformed_name(v.name) else v.name
for v in s.vars
]
)
Expand Down Expand Up @@ -491,10 +489,7 @@ def sample(
start = start_
except (AttributeError, NotImplementedError, tg.NullTypeGradError):
# gradient computation failed
_log.info(
"Initializing NUTS failed. "
"Falling back to elementwise auto-assignment."
)
_log.info("Initializing NUTS failed. " "Falling back to elementwise auto-assignment.")
_log.debug("Exception in init nuts", exec_info=True)
step = assign_step_methods(model, step, step_kwargs=kwargs)
else:
Expand Down Expand Up @@ -559,9 +554,7 @@ def sample(
has_demcmc = np.any(
[
isinstance(m, DEMetropolis)
for m in (
step.methods if isinstance(step, CompoundStep) else [step]
)
for m in (step.methods if isinstance(step, CompoundStep) else [step])
]
)
_log.info("Population sampling ({} chains)".format(chains))
Expand Down Expand Up @@ -625,9 +618,7 @@ def sample(

if compute_convergence_checks:
if draws - tune < 100:
warnings.warn(
"The number of samples is too small to check convergence reliably."
)
warnings.warn("The number of samples is too small to check convergence reliably.")
else:
trace.report._run_convergence_checks(idata, model)
trace.report._log_summary()
Expand Down Expand Up @@ -664,14 +655,7 @@ def _check_start_shape(model, start):


def _sample_many(
draws,
chain: int,
chains: int,
start: list,
random_seed: list,
step,
callback=None,
**kwargs,
draws, chain: int, chains: int, start: list, random_seed: list, step, callback=None, **kwargs,
):
"""Samples all chains sequentially.

Expand Down Expand Up @@ -833,9 +817,7 @@ def _sample(
"""
skip_first = kwargs.get("skip_first", 0)

sampling = _iter_sample(
draws, step, start, trace, chain, tune, model, random_seed, callback
)
sampling = _iter_sample(draws, step, start, trace, chain, tune, model, random_seed, callback)
_pbar_data = {"chain": chain, "divergences": 0}
_desc = "Sampling chain {chain:d}, {divergences:,d} divergences"
if progressbar:
Expand Down Expand Up @@ -909,9 +891,7 @@ def iter_sample(
for trace in iter_sample(500, step):
...
"""
sampling = _iter_sample(
draws, step, start, trace, chain, tune, model, random_seed, callback
)
sampling = _iter_sample(draws, step, start, trace, chain, tune, model, random_seed, callback)
for i, (strace, _) in enumerate(sampling):
yield MultiTrace([strace[: i + 1]])

Expand Down Expand Up @@ -1012,8 +992,7 @@ def _iter_sample(
if callback is not None:
warns = getattr(step, "warnings", None)
callback(
trace=strace,
draw=Draw(chain, i == draws, i, i < tune, stats, point, warns),
trace=strace, draw=Draw(chain, i == draws, i, i < tune, stats, point, warns),
)

yield strace, diverging
Expand Down Expand Up @@ -1067,9 +1046,7 @@ def __init__(self, steppers, parallelize, progressbar=True):
import multiprocessing

for c, stepper in (
enumerate(progress_bar(steppers))
if progressbar
else enumerate(steppers)
enumerate(progress_bar(steppers)) if progressbar else enumerate(steppers)
):
secondary_end, primary_end = multiprocessing.Pipe()
stepper_dumps = pickle.dumps(stepper, protocol=4)
Expand Down Expand Up @@ -1136,9 +1113,7 @@ def _run_secondary(c, stepper_dumps, secondary_end):
# but rather a CompoundStep. PopulationArrayStepShared.population
# has to be updated, therefore we identify the substeppers first.
population_steppers = []
for sm in (
stepper.methods if isinstance(stepper, CompoundStep) else [stepper]
):
for sm in stepper.methods if isinstance(stepper, CompoundStep) else [stepper]:
if isinstance(sm, arraystep.PopulationArrayStepShared):
population_steppers.append(sm)
while True:
Expand Down Expand Up @@ -1682,13 +1657,9 @@ def sample_posterior_predictive(
nchain = 1

if keep_size and samples is not None:
raise IncorrectArgumentsError(
"Should not specify both keep_size and samples arguments"
)
raise IncorrectArgumentsError("Should not specify both keep_size and samples arguments")
if keep_size and size is not None:
raise IncorrectArgumentsError(
"Should not specify both keep_size and size arguments"
)
raise IncorrectArgumentsError("Should not specify both keep_size and size arguments")

if samples is None:
if isinstance(_trace, MultiTrace):
Expand All @@ -1714,15 +1685,11 @@ def sample_posterior_predictive(

if var_names is not None:
if vars is not None:
raise IncorrectArgumentsError(
"Should not specify both vars and var_names arguments."
)
raise IncorrectArgumentsError("Should not specify both vars and var_names arguments.")
else:
vars = [model[x] for x in var_names]
elif vars is not None: # var_names is None, and vars is not.
warnings.warn(
"vars argument is deprecated in favor of var_names.", DeprecationWarning
)
warnings.warn("vars argument is deprecated in favor of var_names.", DeprecationWarning)
if vars is None:
vars = model.observed_RVs

Expand All @@ -1741,11 +1708,7 @@ def sample_posterior_predictive(
# the trace object will either be a MultiTrace (and have _straces)...
if hasattr(_trace, "_straces"):
chain_idx, point_idx = np.divmod(idx, len_trace)
param = (
cast(MultiTrace, _trace)
._straces[chain_idx % nchain]
.point(point_idx)
)
param = cast(MultiTrace, _trace)._straces[chain_idx % nchain].point(point_idx)
# ... or a PointList
else:
param = cast(PointList, _trace)[idx % len_trace]
Expand Down Expand Up @@ -1783,9 +1746,9 @@ def sample_posterior_predictive_w(
Parameters
----------
traces : list or list of lists
List of traces generated from MCMC sampling, or a list of list
containing dicts from find_MAP() or points. The number of traces should
be equal to the number of weights.
List of traces generated from MCMC sampling (xarray.Dataset, arviz.InferenceData, or
MultiTrace), or a list of list containing dicts from find_MAP() or points. The number of
traces should be equal to the number of weights.
samples : int, optional
Number of posterior predictive samples to generate. Defaults to the
length of the shorter trace in traces.
Expand All @@ -1811,6 +1774,17 @@ def sample_posterior_predictive_w(
"""
np.random.seed(random_seed)

if isinstance(traces[0], InferenceData):
n_samples = [
trace.posterior.sizes["chain"] * trace.posterior.sizes["draw"] for trace in traces
]
traces = [dataset_to_point_dict(trace.posterior) for trace in traces]
elif isinstance(traces[0], xarray.Dataset):
n_samples = [trace.sizes["chain"] * trace.sizes["draw"] for trace in traces]
traces = [dataset_to_point_dict(trace) for trace in traces]
else:
n_samples = [len(i) * i.nchains for i in traces]

if models is None:
models = [modelcontext(models)] * len(traces)

Expand All @@ -1830,7 +1804,7 @@ def sample_posterior_predictive_w(
weights = np.asarray(weights)
p = weights / np.sum(weights)

min_tr = min([len(i) * i.nchains for i in traces])
min_tr = min(n_samples)

n = (min_tr * p).astype("int")
# ensure n sum up to min_tr
Expand Down Expand Up @@ -1933,18 +1907,15 @@ def sample_prior_predictive(
if vars is None and var_names is None:
prior_pred_vars = model.observed_RVs
prior_vars = (
get_default_varnames(model.unobserved_RVs, include_transformed=True)
+ model.potentials
get_default_varnames(model.unobserved_RVs, include_transformed=True) + model.potentials
)
vars_ = [var.name for var in prior_vars + prior_pred_vars]
vars = set(vars_)
elif vars is None:
vars = var_names
vars_ = vars
elif vars is not None:
warnings.warn(
"vars argument is deprecated in favor of var_names.", DeprecationWarning
)
warnings.warn("vars argument is deprecated in favor of var_names.", DeprecationWarning)
vars_ = vars
else:
raise ValueError("Cannot supply both vars and var_names arguments.")
Expand Down Expand Up @@ -1974,13 +1945,7 @@ def sample_prior_predictive(


def init_nuts(
init="auto",
chains=1,
n_init=500000,
model=None,
random_seed=None,
progressbar=True,
**kwargs,
init="auto", chains=1, n_init=500000, model=None, random_seed=None, progressbar=True, **kwargs,
):
"""Set up the mass matrix initialization for NUTS.

Expand Down Expand Up @@ -2036,9 +2001,7 @@ def init_nuts(
if set(vars) != set(model.vars):
raise ValueError("Must use init_nuts on all variables of a model.")
if not all_continuous(vars):
raise ValueError(
"init_nuts can only be used for models with only " "continuous variables."
)
raise ValueError("init_nuts can only be used for models with only " "continuous variables.")

if not isinstance(init, str):
raise TypeError("init must be a string.")
Expand Down Expand Up @@ -2092,9 +2055,7 @@ def init_nuts(
mean = approx.bij.rmap(approx.mean.get_value())
mean = model.dict_to_array(mean)
weight = 50
potential = quadpotential.QuadPotentialDiagAdaptGrad(
model.ndim, mean, cov, weight
)
potential = quadpotential.QuadPotentialDiagAdaptGrad(model.ndim, mean, cov, weight)
elif init == "advi+adapt_diag":
approx = pm.fit(
random_seed=random_seed,
Expand Down
12 changes: 7 additions & 5 deletions pymc3/tests/test_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,22 +718,24 @@ def test_sample_posterior_predictive_w(self):
mu = pm.Normal("mu", mu=0, sigma=1)
y = pm.Normal("y", mu=mu, sigma=1, observed=data0)
trace_0 = pm.sample()
idata_0 = az.from_pymc3(trace_0)

with pm.Model() as model_1:
mu = pm.Normal("mu", mu=0, sigma=1, shape=len(data0))
y = pm.Normal("y", mu=mu, sigma=1, observed=data0)
trace_1 = pm.sample()

traces = [trace_0, trace_0]
models = [model_0, model_0]
ppc = pm.sample_posterior_predictive_w(traces, 100, models)
assert ppc["y"].shape == (100, 500)
idata_1 = az.from_pymc3(trace_1)

traces = [trace_0, trace_1]
idatas = [idata_0, idata_1]
models = [model_0, model_1]

ppc = pm.sample_posterior_predictive_w(traces, 100, models)
assert ppc["y"].shape == (100, 500)

ppc = pm.sample_posterior_predictive_w(idatas, 100, models)
assert ppc["y"].shape == (100, 500)


@pytest.mark.parametrize(
"method",
Expand Down