Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update robust glm notebook #3908

Merged
merged 14 commits into from
May 4, 2020
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 0 additions & 14 deletions .readthedocs.yml

This file was deleted.

4 changes: 3 additions & 1 deletion RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
### Maintenance
- Remove `sample_ppc` and `sample_ppc_w` that were deprecated in 3.6.
- Tuning results no longer leak into sequentially sampled `Metropolis` chains (see #3733 and #3796).
- Deprecated `sd` in version 3.7 has been replaced by `sigma` now raises `DepreciationWarning` on using `sd` in continuous, mixed and timeseries distributions. (see #3837 and #3688).
- Deprecated `sd` in version 3.7 has been replaced by `sigma` now raises `DeprecationWarning` on using `sd` in continuous, mixed and timeseries distributions. (see #3837 and #3688).
- We'll deprecate the `Text` and `SQLite` backends and the `save_trace`/`load_trace` functions, since this is now done with ArviZ. (see [#3902](https://github.com/pymc-devs/pymc3/pull/3902))
- In named models, `pm.Data` objects now get model-relative names (see [#3843](https://github.com/pymc-devs/pymc3/pull/3843)).
- `pm.sample` now takes 1000 draws and 1000 tuning samples by default, instead of 500 previously (see [#3855](https://github.com/pymc-devs/pymc3/pull/3855)).
- Dropped some deprecated kwargs and functions (see [#3906](https://github.com/pymc-devs/pymc3/pull/3906))
- Dropped the outdated 'nuts' initialization method for `pm.sample` (see [#3863](https://github.com/pymc-devs/pymc3/pull/3863)).
- Moved argument division out of `NegativeBinomial` `random` method. Fixes [#3864](https://github.com/pymc-devs/pymc3/issues/3864) in the style of [#3509](https://github.com/pymc-devs/pymc3/pull/3509).
- The Dirichlet distribution now raises a ValueError when it's initialized with <= 0 values (see [#3853](https://github.com/pymc-devs/pymc3/pull/3853)).
Expand Down
1,822 changes: 1,451 additions & 371 deletions docs/source/notebooks/GLM-robust-with-outlier-detection.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/source/notebooks/api_quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1284,7 +1284,7 @@
}
],
"source": [
"pm.gelman_rubin(trace)"
"pm.rhat(trace)"
]
},
{
Expand Down
6 changes: 3 additions & 3 deletions docs/source/notebooks/bayes_param_survival_pymc3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@
}
],
"source": [
"max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(weibull_trace).values())"
"max(np.max(gr_stats) for gr_stats in pm.rhat(weibull_trace).values())"
]
},
{
Expand Down Expand Up @@ -904,7 +904,7 @@
}
],
"source": [
"max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(log_logistic_trace).values())"
"max(np.max(gr_stats) for gr_stats in pm.rhat(log_logistic_trace).values())"
]
},
{
Expand Down Expand Up @@ -1030,4 +1030,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}
4 changes: 2 additions & 2 deletions docs/source/notebooks/rugby_analytics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,7 @@
"outputs": [],
"source": [
"bfmi = np.max(pm.stats.bfmi(trace))\n",
"max_gr = max(np.max(gr_stats) for gr_stats in pm.stats.gelman_rubin(trace).values()).values"
"max_gr = max(np.max(gr_stats) for gr_stats in pm.stats.rhat(trace).values()).values"
]
},
{
Expand Down Expand Up @@ -1470,4 +1470,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
18 changes: 18 additions & 0 deletions pymc3/backends/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import os
import shutil
from typing import Optional, Dict, Any, List
import warnings

import numpy as np
from pymc3.backends import base
Expand Down Expand Up @@ -52,6 +53,12 @@ def save_trace(trace: MultiTrace, directory: Optional[str]=None, overwrite=False
-------
str, path to the directory where the trace was saved
"""
warnings.warn(
'The `save_trace` function will soon be removed.'
'Instead, use ArviZ to save/load traces.',
DeprecationWarning,
)

if directory is None:
directory = '.pymc_{}.trace'
idx = 1
Expand Down Expand Up @@ -89,6 +96,11 @@ def load_trace(directory: str, model=None) -> MultiTrace:
-------
pm.Multitrace that was saved in the directory
"""
warnings.warn(
'The `load_trace` function will soon be removed.'
'Instead, use ArviZ to save/load traces.',
DeprecationWarning,
)
straces = []
for subdir in glob.glob(os.path.join(directory, '*')):
if os.path.isdir(subdir):
Expand All @@ -106,6 +118,11 @@ class SerializeNDArray:

def __init__(self, directory: str):
"""Helper to save and load NDArray objects"""
warnings.warn(
'The `SerializeNDArray` class will soon be removed. '
'Instead, use ArviZ to save/load traces.',
DeprecationWarning,
)
self.directory = directory
self.metadata_path = os.path.join(self.directory, self.metadata_file)
self.samples_path = os.path.join(self.directory, self.samples_file)
Expand Down Expand Up @@ -367,6 +384,7 @@ def _slice_as_ndarray(strace, idx):

return sliced


def point_list_to_multitrace(point_list: List[Dict[str, np.ndarray]], model: Optional[Model]=None) -> MultiTrace:
'''transform point list into MultiTrace'''
_model = modelcontext(model)
Expand Down
13 changes: 13 additions & 0 deletions pymc3/backends/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"""
import numpy as np
import sqlite3
import warnings

from ..backends import base, ndarray
from . import tracetab as ttab
Expand Down Expand Up @@ -89,6 +90,12 @@ class SQLite(base.BaseTrace):
"""

def __init__(self, name, model=None, vars=None, test_point=None):
warnings.warn(
'The `SQLite` backend will soon be removed. '
'Please switch to a different backend. '
'If you have good reasons for using the SQLite backend, file an issue and tell us about them.',
DeprecationWarning,
)
super().__init__(name, model, vars, test_point)
self._var_cols = {}
self.var_inserts = {} # varname -> insert statement
Expand Down Expand Up @@ -322,6 +329,12 @@ def load(name, model=None):
-------
A MultiTrace instance
"""
warnings.warn(
'The `sqlite.load` function will soon be removed. '
'Please use ArviZ to save traces. '
'If you have good reasons for using the `load` function, file an issue and tell us about them. ',
DeprecationWarning,
)
db = _SQLiteDB(name)
db.connect()
varnames = _get_table_list(db.cursor)
Expand Down
19 changes: 19 additions & 0 deletions pymc3/backends/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import os
import re
import pandas as pd
import warnings

from ..backends import base, ndarray
from . import tracetab as ttab
Expand All @@ -57,6 +58,12 @@ class Text(base.BaseTrace):
"""

def __init__(self, name, model=None, vars=None, test_point=None):
warnings.warn(
'The `Text` backend will soon be removed. '
'Please switch to a different backend. '
'If you have good reasons for using the Text backend, file an issue and tell us about them. ',
DeprecationWarning,
)
if not os.path.exists(name):
os.mkdir(name)
super().__init__(name, model, vars, test_point)
Expand Down Expand Up @@ -185,6 +192,12 @@ def load(name, model=None):
-------
A MultiTrace instance
"""
warnings.warn(
'The `load` function will soon be removed. '
'Please use ArviZ to save traces. '
'If you have good reasons for using the `load` function, file an issue and tell us about them. ',
DeprecationWarning,
)
files = glob(os.path.join(name, 'chain-*.csv'))

if len(files) == 0:
Expand Down Expand Up @@ -224,6 +237,12 @@ def dump(name, trace, chains=None):
chains: list
Chains to dump. If None, all chains are dumped.
"""
warnings.warn(
'The `dump` function will soon be removed. '
'Please use ArviZ to save traces. '
'If you have good reasons for using the `dump` function, file an issue and tell us about them. ',
DeprecationWarning,
)
if not os.path.exists(name):
os.mkdir(name)
if chains is None:
Expand Down
7 changes: 7 additions & 0 deletions pymc3/backends/tracetab.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import numpy as np
import pandas as pd
import warnings

from ..util import get_default_varnames

Expand All @@ -39,6 +40,12 @@ def trace_to_dataframe(trace, chains=None, varnames=None, include_transformed=Fa
If true transformed variables will be included in the resulting
DataFrame.
"""
warnings.warn(
'The `trace_to_dataframe` function will soon be removed. '
'Please use ArviZ to save traces. '
'If you have good reasons for using the `trace_to_dataframe` function, file an issue and tell us about them. ',
DeprecationWarning,
)
var_shapes = trace._straces[0].var_shapes

if varnames is None:
Expand Down
2 changes: 1 addition & 1 deletion pymc3/examples/samplers_mvnormal.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def run(steppers, p):
runtimes[name] = time.time() - t_start
print('{} samples across {} chains'.format(len(mt) * mt.nchains, mt.nchains))
traces[name] = mt
en = pm.diagnostics.effective_n(mt)
en = pm.ess(mt)
print('effective: {}\r\n'.format(en))
if USE_XY:
effn[name] = np.mean(en['x']) / len(mt) / mt.nchains
Expand Down
70 changes: 32 additions & 38 deletions pymc3/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,16 +334,42 @@ def sample(
Notes
-----
Optional keyword arguments can be passed to ``sample`` to be delivered to the
``step_method``s used during sampling. In particular, the NUTS step method accepts
a number of arguments. Common options are:
``step_method``s used during sampling.

* target_accept: float in [0, 1]. The step size is tuned such that we approximate this
acceptance rate. Higher values like 0.9 or 0.95 often work better for problematic
posteriors.
* max_treedepth: The maximum depth of the trajectory tree.
If your model uses only one step method, you can address step method kwargs
directly. In particular, the NUTS step method has several options including:

* target_accept: float in [0, 1]. The step size is tuned such that we
approximate this acceptance rate. Higher values like 0.9 or 0.95 often
work better for problematic posteriors
* max_treedepth: The maximum depth of the trajectory tree
* step_scale: float, default 0.25
The initial guess for the step size scaled down by :math:`1/n**(1/4)`

If your model uses multiple step methods, aka a Compound Step, then you have
two ways to address arguments to each step method:

A: If you let ``sample()`` automatically assign the ``step_method``s,
and you can correctly anticipate what they will be, then you can wrap
step method kwargs in a dict and pass that to sample() with a kwarg set
to the name of the step method.
e.g. for a CompoundStep comprising NUTS and BinaryGibbsMetropolis,
you could send:
1. ``target_accept`` to NUTS: nuts={'target_accept':0.9}
2. ``transit_p`` to BinaryGibbsMetropolis: binary_gibbs_metropolis={'transit_p':.7}

Note that available names are:
``nuts``, ``hmc``, ``metropolis``, ``binary_metropolis``,
``binary_gibbs_metropolis``, ``categorical_gibbs_metropolis``,
``DEMetropolis``, ``DEMetropolisZ``, ``slice``

B: If you manually declare the ``step_method``s, within the ``step``
kwarg, then you can address the ``step_method`` kwargs directly.
e.g. for a CompoundStep comprising NUTS and BinaryGibbsMetropolis,
you could send:
step=[pm.NUTS([freeRV1, freeRV2], target_accept=0.9),
pm.BinaryGibbsMetropolis([freeRV3], transit_p=.7)]

You can find a full list of arguments in the docstring of the step methods.

Examples
Expand All @@ -368,36 +394,9 @@ def sample(
"""
model = modelcontext(model)

nuts_kwargs = kwargs.pop("nuts_kwargs", None)
if nuts_kwargs is not None:
warnings.warn(
"The nuts_kwargs argument has been deprecated. Pass step "
"method arguments directly to sample instead",
DeprecationWarning,
)
kwargs.update(nuts_kwargs)
step_kwargs = kwargs.pop("step_kwargs", None)
if step_kwargs is not None:
warnings.warn(
"The step_kwargs argument has been deprecated. Pass step "
"method arguments directly to sample instead",
DeprecationWarning,
)
kwargs.update(step_kwargs)

if cores is None:
cores = min(4, _cpu_count())

if "njobs" in kwargs:
cores = kwargs["njobs"]
warnings.warn(
"The njobs argument has been deprecated. Use cores instead.", DeprecationWarning
)
if "nchains" in kwargs:
chains = kwargs["nchains"]
warnings.warn(
"The nchains argument has been deprecated. Use chains instead.", DeprecationWarning
)
if chains is None:
chains = max(2, cores)
if isinstance(start, dict):
Expand All @@ -412,11 +411,6 @@ def sample(
random_seed = [np.random.randint(2 ** 30) for _ in range(chains)]
if not isinstance(random_seed, Iterable):
raise TypeError("Invalid value for `random_seed`. Must be tuple, list or int")
if "chain" in kwargs:
chain_idx = kwargs["chain"]
warnings.warn(
"The chain argument has been deprecated. Use chain_idx instead.", DeprecationWarning
)

if start is not None:
for start_vals in start:
Expand Down
15 changes: 0 additions & 15 deletions pymc3/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,6 @@ def wrapped(*args, **kwargs):
waic = map_args(az.waic)


def gelman_rubin(*args, **kwargs):
warnings.warn("gelman_rubin has been deprecated. In the future, use rhat instead.")
return rhat(*args, **kwargs)

gelman_rubin.__doc__ = rhat.__doc__


def effective_n(*args, **kwargs):
warnings.warn("effective_n has been deprecated. In the future, use ess instead.")
return ess(*args, **kwargs)

effective_n.__doc__ = ess.__doc__

__all__ = [
"bfmi",
"compare",
Expand All @@ -78,6 +65,4 @@ def effective_n(*args, **kwargs):
"rhat",
"summary",
"waic",
"gelman_rubin", # deprecated, remove after 3.8
"effective_n", # deprecated, remove after 3.8
]
4 changes: 2 additions & 2 deletions pymc3/tests/sampler_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,12 @@ def setup_class(cls):

def test_neff(self):
if hasattr(self, 'min_n_eff'):
n_eff = pm.effective_n(self.trace[self.burn:])
n_eff = pm.ess(self.trace[self.burn:])
for var in n_eff:
npt.assert_array_less(self.min_n_eff, n_eff[var])

def test_Rhat(self):
rhat = pm.gelman_rubin(self.trace[self.burn:])
rhat = pm.rhat(self.trace[self.burn:])
for var in rhat:
npt.assert_allclose(rhat[var], 1, rtol=0.01)

Expand Down
2 changes: 1 addition & 1 deletion pymc3/tests/test_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def test_sample_args(self):
assert "'foo'" in str(excinfo.value)

with pytest.raises(ValueError) as excinfo:
pm.sample(50, tune=0, init=None, step_kwargs={"foo": {}})
pm.sample(50, tune=0, init=None, foo={})
assert "foo" in str(excinfo.value)

with pytest.raises(ValueError) as excinfo:
Expand Down