From 9149a3e8ae4354db928211cbdf6b882caff3a1bc Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 31 May 2022 17:15:28 +0200 Subject: [PATCH] Use more wide default init_dist in GaussianRandomWalk and AR and raise UserWarning when not explicitly defined --- pymc/distributions/timeseries.py | 24 +++++++++++++++------ pymc/tests/test_distributions.py | 2 +- pymc/tests/test_distributions_timeseries.py | 6 +++--- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py index 224314577bf..29f3d75c940 100644 --- a/pymc/distributions/timeseries.py +++ b/pymc/distributions/timeseries.py @@ -227,7 +227,6 @@ class GaussianRandomWalk(distribution.Continuous): sigma > 0, innovation standard deviation, defaults to 1.0 init_dist : unnamed distribution Univariate distribution of the initial value, created with the `.dist()` API. - Defaults to a unit Normal. .. warning:: init will be cloned, rendering them independent of the ones passed as input. @@ -274,7 +273,12 @@ def dist( # If no scalar distribution is passed then initialize with a Normal of same mu and sigma if init_dist is None: - init_dist = Normal.dist(0, 1) + warnings.warn( + "Initial distribution not specified, defaulting to `Normal.dist(0, 100)`." + "You can specify an init_dist manually to suppress this warning.", + UserWarning, + ) + init_dist = Normal.dist(0, 100) else: if not ( isinstance(init_dist, at.TensorVariable) @@ -369,10 +373,10 @@ class AR(SymbolicDistribution): constant: bool, optional Whether the first element of rho should be used as a constant term in the AR process. Defaults to False - init_dist: unnamed distribution, optional - Scalar or vector distribution for initial values. Defaults to a unit Normal. - Distribution should be created via the `.dist()` API, and have dimension - (*size, ar_order). If not, it will be automatically resized. + init_dist: unnamed distribution + Scalar or vector distribution for initial values. Distribution should be + created via the `.dist()` API, and have dimension (*size, ar_order). If not, + it will be automatically resized. .. warning:: init_dist will be cloned, rendering it independent of the one passed as input. @@ -461,7 +465,13 @@ def dist( f"got ndim_supp={init_dist.owner.op.ndim_supp}.", ) else: - init_dist = Normal.dist(0, 1, size=(*sigma.shape, ar_order)) + warnings.warn( + "Initial distribution not specified, defaulting to " + "`Normal.dist(0, 100, shape=...)`. You can specify an init_dist " + "manually to suppress this warning.", + UserWarning, + ) + init_dist = Normal.dist(0, 100, size=(*sigma.shape, ar_order)) # Tell Aeppl to ignore init_dist, as it will be accounted for in the logp term init_dist = ignore_logprob(init_dist) diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py index c805e01f12c..ba161140482 100644 --- a/pymc/tests/test_distributions.py +++ b/pymc/tests/test_distributions.py @@ -2610,7 +2610,7 @@ def test_gaussianrandomwalk(self): def ref_logp(value, mu, sigma, steps): # Relying on fact that init will be normal by default return ( - scipy.stats.norm.logpdf(value[0]) + scipy.stats.norm.logpdf(value[0], 0, 100) # default init_dist has a scale 100 + scipy.stats.norm.logpdf(np.diff(value), mu, sigma).sum() ) diff --git a/pymc/tests/test_distributions_timeseries.py b/pymc/tests/test_distributions_timeseries.py index 500f6f2b508..e3d8e671c4b 100644 --- a/pymc/tests/test_distributions_timeseries.py +++ b/pymc/tests/test_distributions_timeseries.py @@ -310,11 +310,11 @@ def test_batched_rhos(self): y_tp = np.random.randn(batch_size, steps) with Model() as t0: beta = Normal("beta", 0.0, 1.0, shape=(batch_size, ar_order), initval=beta_tp) - AR("y", beta, sigma=1.0, shape=(batch_size, steps), initval=y_tp) + AR("y", beta, sigma=1.0, init_dist=Normal.dist(0, 1), shape=(batch_size, steps), initval=y_tp) with Model() as t1: beta = Normal("beta", 0.0, 1.0, shape=(batch_size, ar_order), initval=beta_tp) for i in range(batch_size): - AR(f"y_{i}", beta[i], sigma=1.0, shape=steps, initval=y_tp[i]) + AR(f"y_{i}", beta[i], init_dist=Normal.dist(0, 1), sigma=1.0, shape=steps, initval=y_tp[i]) np.testing.assert_allclose( t0.compile_logp()(t0.initial_point()), @@ -379,7 +379,7 @@ def test_batched_init_dist(self): beta_tp = aesara.shared(np.random.randn(ar_order), shape=(3,)) y_tp = np.random.randn(batch_size, steps) with Model() as t0: - init_dist = Normal.dist(0.0, 1.0, size=(batch_size, ar_order)) + init_dist = Normal.dist(0.0, 100.0, size=(batch_size, ar_order)) AR("y", beta_tp, sigma=0.01, init_dist=init_dist, steps=steps, initval=y_tp) with Model() as t1: for i in range(batch_size):