From 687f044bfcf37f58a410ea21509174d7a3ea5fc0 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 14 Jun 2021 14:45:35 +0200 Subject: [PATCH 1/5] Enable prior_predictive to return transformed values --- RELEASE-NOTES.md | 3 ++- pymc3/sampling.py | 23 ++++++++++++++++++++-- pymc3/tests/test_sampling.py | 37 ++++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 0e220ea2ac1..81c7a089726 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -7,7 +7,8 @@ - The GLM submodule has been removed, please use [Bambi](https://bambinos.github.io/bambi/) instead. - The `Distribution` keyword argument `testval` has been deprecated in favor of `initval`. - `pm.sample` now returns results as `InferenceData` instead of `MultiTrace` by default (see [#4744](https://github.com/pymc-devs/pymc3/pull/4744)). -- ... +- `pm.sample_prior_predictive` no longer returns transformed variable values by default. Pass them by name in `var_names` if you want to obtain these draws (see [4769](https://github.com/pymc-devs/pymc3/pull/4769)). +... ### New Features - The `CAR` distribution has been added to allow for use of conditional autoregressions which often are used in spatial and network models. diff --git a/pymc3/sampling.py b/pymc3/sampling.py index a24ca8d259e..44128ae7152 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -1943,7 +1943,8 @@ def sample_prior_predictive( model : Model (optional if in ``with`` context) var_names : Iterable[str] A list of names of variables for which to compute the posterior predictive - samples. Defaults to both observed and unobserved RVs. + samples. Defaults to both observed and unobserved RVs. Transformed values + are not included unless explicitly defined in var_names. random_seed : int Seed for the random number generator. mode: @@ -1983,8 +1984,26 @@ def sample_prior_predictive( ) names = get_default_varnames(vars_, include_transformed=False) - vars_to_sample = [model[name] for name in names] + + # Any variables from var_names that are missing must be transformed variables. + # Misspelled variables would have raised a KeyError above. + missing_names = vars_.difference(names) + for name in missing_names: + transformed_value_var = model[name] + rv_var = model.values_to_rvs[transformed_value_var] + transform = transformed_value_var.tag.transform + transformed_rv_var = transform.forward(rv_var, rv_var) + + names.append(name) + vars_to_sample.append(transformed_rv_var) + + # If the user asked for the transformed variable in var_names, but not the + # original RV, we add it manually here + if rv_var.name not in names: + names.append(rv_var.name) + vars_to_sample.append(rv_var) + inputs = [i for i in inputvars(vars_to_sample) if not isinstance(i, SharedVariable)] sampler_fn = compile_rv_inplace( diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py index 79ec2f0ed63..0e5b2e07526 100644 --- a/pymc3/tests/test_sampling.py +++ b/pymc3/tests/test_sampling.py @@ -1076,6 +1076,43 @@ def test_potentials_warning(self): with pytest.warns(UserWarning, match=warning_msg): pm.sample_prior_predictive(samples=5) + def test_transformed_vars(self): + # Test that prior predictive returns transformation of RVs when these are + # passed explicitly in `var_names` + + def ub_interval_forward(x, ub): + # Interval transform assuming lower bound is zero + return np.log(x - 0) - np.log(ub - x) + + with pm.Model(rng_seeder=123) as model: + ub = pm.HalfNormal("ub", 10) + x = pm.Uniform("x", 0, ub) + + prior = pm.sample_prior_predictive( + var_names=["ub", "ub_log__", "x", "x_interval__"], + samples=10, + ) + + # Check values are correct + assert np.allclose(prior["ub_log__"], np.log(prior["ub"])) + assert np.allclose( + prior["x_interval__"], + ub_interval_forward(prior["x"], prior["ub"]), + ) + + # Check that it works when the original RVs are not mentioned in var_names + with pm.Model(rng_seeder=123) as model_transformed_only: + ub = pm.HalfNormal("ub", 10) + x = pm.Uniform("x", 0, ub) + + prior_transformed_only = pm.sample_prior_predictive( + var_names=["ub_log__", "x_interval__"], + samples=10, + ) + assert "ub" not in prior_transformed_only and "x" not in prior_transformed_only + assert np.allclose(prior["ub_log__"], prior_transformed_only["ub_log__"]) + assert np.allclose(prior["x_interval__"], prior_transformed_only["x_interval__"]) + class TestSamplePosteriorPredictive: def test_point_list_arg_bug_spp(self, point_list_arg_bug_fixture): From 2eb31935ef58ea299a8085bc91670fc6f81a9b30 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 14 Jun 2021 14:51:54 +0200 Subject: [PATCH 2/5] Add test which closes #4490 --- pymc3/tests/test_sampling.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py index 0e5b2e07526..f8d4e74e4ed 100644 --- a/pymc3/tests/test_sampling.py +++ b/pymc3/tests/test_sampling.py @@ -1113,6 +1113,29 @@ def ub_interval_forward(x, ub): assert np.allclose(prior["ub_log__"], prior_transformed_only["ub_log__"]) assert np.allclose(prior["x_interval__"], prior_transformed_only["x_interval__"]) + def test_issue_4490(self): + # Test that samples do not depend on var_name order or, more fundamentally, + # that they do not depend on the set order used inside `sample_prior_predictive` + seed = 4490 + with pm.Model(rng_seeder=seed) as m1: + a = pm.Normal("a") + b = pm.Normal("b") + c = pm.Normal("c") + d = pm.Normal("d") + prior1 = pm.sample_prior_predictive(samples=1, var_names=["a", "b", "c", "d"]) + + with pm.Model(rng_seeder=seed) as m2: + a = pm.Normal("a") + b = pm.Normal("b") + c = pm.Normal("c") + d = pm.Normal("d") + prior2 = pm.sample_prior_predictive(samples=1, var_names=["b", "a", "d", "c"]) + + assert prior1["a"] == prior2["a"] + assert prior1["b"] == prior2["b"] + assert prior1["c"] == prior2["c"] + assert prior1["d"] == prior2["d"] + class TestSamplePosteriorPredictive: def test_point_list_arg_bug_spp(self, point_list_arg_bug_fixture): From 4a731a51bc684b1e0ca896695db9fe940a3a20a9 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 14 Jun 2021 17:04:58 +0200 Subject: [PATCH 3/5] Fix SMC regression and re-enable `test_smc.py` --- .github/workflows/pytest.yml | 1 - pymc3/smc/smc.py | 2 +- pymc3/tests/test_smc.py | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index ce102602cf4..d4de19923de 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -30,7 +30,6 @@ jobs: --ignore=pymc3/tests/test_modelcontext.py --ignore=pymc3/tests/test_parallel_sampling.py --ignore=pymc3/tests/test_profile.py - --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_step.py --ignore=pymc3/tests/test_tuning.py --ignore=pymc3/tests/test_types.py diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py index 07470dadf8d..321f41f3ea7 100644 --- a/pymc3/smc/smc.py +++ b/pymc3/smc/smc.py @@ -87,7 +87,7 @@ def initialize_population(self): if self.start is None: init_rnd = sample_prior_predictive( self.draws, - var_names=[v.name for v in self.model.unobserved_RVs], + var_names=[v.name for v in self.model.unobserved_value_vars], model=self.model, ) else: diff --git a/pymc3/tests/test_smc.py b/pymc3/tests/test_smc.py index 980dace8115..84bc51d419a 100644 --- a/pymc3/tests/test_smc.py +++ b/pymc3/tests/test_smc.py @@ -98,6 +98,7 @@ def test_start(self): trace = pm.sample_smc(500, start=start) +@pytest.mark.xfail(reason="SMC-ABC not refactored yet") class TestSMCABC(SeededTest): def setup_class(self): super().setup_class() From 22da46a483d6b919450626bb47693ab7bab5de24 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 15 Jun 2021 15:47:37 +0200 Subject: [PATCH 4/5] Minor changes to the `pytest.yml` comments --- .github/workflows/pytest.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index d4de19923de..7600df05a1b 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -13,11 +13,13 @@ jobs: floatx: [float32, float64] test-subset: # Tests are split into multiple jobs to accelerate the CI. + # Different jobs should be organized to take approximately the same + # time to complete (and not be prohibitely slow) # # How this works: # 1st block: Only passes --ignore parameters to pytest. # → pytest will run all test_*.py files that are NOT ignored. - # Other blocks: Only pass paths to test files. + # Subsequent blocks: Only pass paths to test files. # → pytest will run only these files # # Any test that was not ignored runs in the first job. From 0ffdf22e4be091e4f9fb83a91eb55ffa409ee06d Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 16 Jun 2021 09:49:23 +0200 Subject: [PATCH 5/5] Add workaround for floatX == 'float32' and discrete variables --- pymc3/smc/smc.py | 19 +++++++++++++++++-- pymc3/tests/test_smc.py | 9 +++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py index 321f41f3ea7..9ac914e335b 100644 --- a/pymc3/smc/smc.py +++ b/pymc3/smc/smc.py @@ -12,11 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings + from collections import OrderedDict import aesara.tensor as at import numpy as np +from aesara import config from aesara import function as aesara_function from scipy.special import logsumexp from scipy.stats import multivariate_normal @@ -290,9 +293,21 @@ def logp_forw(point, out_vars, vars, shared): shared: List containing :class:`aesara.tensor.Tensor` for depended shared data """ + out_list, inarray0 = join_nonshared_inputs(point, out_vars, vars, shared) - f = aesara_function([inarray0], out_list[0]) - f.trust_input = True + # TODO: Figure out how to safely accept float32 (floatX) input when there are + # discrete variables of int64 dtype in `vars`. + # See https://github.com/pymc-devs/pymc3/pull/4769#issuecomment-861494080 + if config.floatX == "float32" and any(var.dtype == "int64" for var in vars): + warnings.warn( + "SMC sampling may run slower due to the presence of discrete variables " + "together with aesara.config.floatX == `float32`", + UserWarning, + ) + f = aesara_function([inarray0], out_list[0], allow_input_downcast=True) + else: + f = aesara_function([inarray0], out_list[0]) + f.trust_input = False return f diff --git a/pymc3/tests/test_smc.py b/pymc3/tests/test_smc.py index 84bc51d419a..f6f01e2e484 100644 --- a/pymc3/tests/test_smc.py +++ b/pymc3/tests/test_smc.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara import aesara.tensor as at import numpy as np import pytest @@ -97,6 +98,14 @@ def test_start(self): } trace = pm.sample_smc(500, start=start) + def test_slowdown_warning(self): + with aesara.config.change_flags(floatX="float32"): + with pytest.warns(UserWarning, match="SMC sampling may run slower due to"): + with pm.Model() as model: + a = pm.Poisson("a", 5) + y = pm.Normal("y", a, 5, observed=[1, 2, 3, 4]) + trace = pm.sample_smc() + @pytest.mark.xfail(reason="SMC-ABC not refactored yet") class TestSMCABC(SeededTest):