diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 2c2abe1eb38..07790257d11 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -11,7 +11,7 @@ - GP covariance functions can now be exponentiated by a scalar. See PR [#3852](https://github.com/pymc-devs/pymc3/pull/3852) - `sample_posterior_predictive` can now feed on `xarray.Dataset` - e.g. from `InferenceData.posterior`. (see [#3846](https://github.com/pymc-devs/pymc3/pull/3846)) - `SamplerReport` (`MultiTrace.report`) now has properties `n_tune`, `n_draws`, `t_sampling` for increased convenience (see [#3827](https://github.com/pymc-devs/pymc3/pull/3827)) -- `pm.sample` now has support for adapting dense mass matrix using `QuadPotentialFullAdapt` (see [#3596](https://github.com/pymc-devs/pymc3/pull/3596), [#3705](https://github.com/pymc-devs/pymc3/pull/3705) and [#3858](https://github.com/pymc-devs/pymc3/pull/3858)) +- `pm.sample` now has support for adapting dense mass matrix using `QuadPotentialFullAdapt` (see [#3596](https://github.com/pymc-devs/pymc3/pull/3596), [#3705](https://github.com/pymc-devs/pymc3/pull/3705), [#3858](https://github.com/pymc-devs/pymc3/pull/3858), and [#3893](https://github.com/pymc-devs/pymc3/pull/3893)). Use `init="adapt_full"` or `init="jitter+adapt_full"` to use. - `Moyal` distribution added (see [#3870](https://github.com/pymc-devs/pymc3/pull/3870)). ### Maintenance diff --git a/benchmarks/benchmarks/benchmarks.py b/benchmarks/benchmarks/benchmarks.py index 6acbad57b23..5035d7f08b4 100644 --- a/benchmarks/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks/benchmarks.py @@ -146,7 +146,7 @@ class NUTSInitSuite: """Tests initializations for NUTS sampler on models """ timeout = 360.0 - params = ('adapt_diag', 'jitter+adapt_diag', 'advi+adapt_diag_grad') + params = ('adapt_diag', 'jitter+adapt_diag', 'jitter+adapt_full', 'adapt_full') number = 1 repeat = 1 draws = 10000 @@ -245,7 +245,7 @@ def freefall(y, t, p): 46.48, 48.18 ]).reshape(-1, 1) - + ode_model = pm.ode.DifferentialEquation(func=freefall, times=times, n_states=1, n_theta=2, t0=0) with pm.Model() as model: # Specify prior distributions for some of our model parameters @@ -255,12 +255,12 @@ def freefall(y, t, p): ode_solution = ode_model(y0=[0], theta=[gamma, 9.8]) # The ode_solution has a shape of (n_times, n_states) Y = pm.Normal("Y", mu=ode_solution, sd=sigma, observed=y) - + t0 = time.time() trace = pm.sample(500, tune=1000, chains=2, cores=2, random_seed=0) tot = time.time() - t0 ess = pm.ess(trace) return np.mean([ess.sigma, ess.gamma]) / tot - + DifferentialEquationSuite.track_1var_2par_ode_ess.unit = 'Effective samples per second' diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 32c4c6accfc..54e3b687d1b 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -1853,8 +1853,8 @@ def init_nuts( * adapt_diag: Start with a identity mass matrix and then adapt a diagonal based on the variance of the tuning samples. All chains use the test value (usually the prior mean) as starting point. - * jitter+adapt_diag: Same as ``adapt_diag``, but use uniform jitter in [-1, 1] as starting - point in each chain. + * jitter+adapt_diag: Same as ``adapt_diag``, but use test value plus a uniform jitter in + [-1, 1] as starting point in each chain. * advi+adapt_diag: Run ADVI and then adapt the resulting diagonal mass matrix based on the sample variance of the tuning samples. * advi+adapt_diag_grad: Run ADVI and then adapt the resulting diagonal mass matrix based @@ -1863,7 +1863,10 @@ def init_nuts( * advi: Run ADVI to estimate posterior mean and diagonal mass matrix. * advi_map: Initialize ADVI with MAP and use MAP as starting point. * map: Use the MAP as starting point. This is discouraged. - * adapt_full: Adapt a dense mass matrix using the sample covariances + * adapt_full: Adapt a dense mass matrix using the sample covariances. All chains use the + test value (usually the prior mean) as starting point. + * jitter+adapt_full: Same as ``adapt_full`, but use test value plus a uniform jitter in + [-1, 1] as starting point in each chain. chains: int Number of jobs to start. n_init: int @@ -2001,6 +2004,16 @@ def init_nuts( mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) cov = np.eye(model.ndim) potential = quadpotential.QuadPotentialFullAdapt(model.ndim, mean, cov, 10) + elif init == 'jitter+adapt_full': + start = [] + for _ in range(chains): + mean = {var: val.copy() for var, val in model.test_point.items()} + for val in mean.values(): + val[...] += 2 * np.random.rand(*val.shape) - 1 + start.append(mean) + mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) + cov = np.eye(model.ndim) + potential = quadpotential.QuadPotentialFullAdapt(model.ndim, mean, cov, 10) else: raise ValueError("Unknown initializer: {}.".format(init)) diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py index 527c944ab70..6ebc91f3231 100644 --- a/pymc3/tests/test_sampling.py +++ b/pymc3/tests/test_sampling.py @@ -675,6 +675,8 @@ def test_sample_posterior_predictive_w(self): "advi+adapt_diag_grad", "map", "advi_map", + "adapt_full", + "jitter+adapt_full", ], ) def test_exec_nuts_init(method):