diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index ce102602cf4..7600df05a1b 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -13,11 +13,13 @@ jobs: floatx: [float32, float64] test-subset: # Tests are split into multiple jobs to accelerate the CI. + # Different jobs should be organized to take approximately the same + # time to complete (and not be prohibitely slow) # # How this works: # 1st block: Only passes --ignore parameters to pytest. # → pytest will run all test_*.py files that are NOT ignored. - # Other blocks: Only pass paths to test files. + # Subsequent blocks: Only pass paths to test files. # → pytest will run only these files # # Any test that was not ignored runs in the first job. @@ -30,7 +32,6 @@ jobs: --ignore=pymc3/tests/test_modelcontext.py --ignore=pymc3/tests/test_parallel_sampling.py --ignore=pymc3/tests/test_profile.py - --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_step.py --ignore=pymc3/tests/test_tuning.py --ignore=pymc3/tests/test_types.py diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 0e220ea2ac1..81c7a089726 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -7,7 +7,8 @@ - The GLM submodule has been removed, please use [Bambi](https://bambinos.github.io/bambi/) instead. - The `Distribution` keyword argument `testval` has been deprecated in favor of `initval`. - `pm.sample` now returns results as `InferenceData` instead of `MultiTrace` by default (see [#4744](https://github.com/pymc-devs/pymc3/pull/4744)). -- ... +- `pm.sample_prior_predictive` no longer returns transformed variable values by default. Pass them by name in `var_names` if you want to obtain these draws (see [4769](https://github.com/pymc-devs/pymc3/pull/4769)). +... ### New Features - The `CAR` distribution has been added to allow for use of conditional autoregressions which often are used in spatial and network models. diff --git a/pymc3/sampling.py b/pymc3/sampling.py index a24ca8d259e..44128ae7152 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -1943,7 +1943,8 @@ def sample_prior_predictive( model : Model (optional if in ``with`` context) var_names : Iterable[str] A list of names of variables for which to compute the posterior predictive - samples. Defaults to both observed and unobserved RVs. + samples. Defaults to both observed and unobserved RVs. Transformed values + are not included unless explicitly defined in var_names. random_seed : int Seed for the random number generator. mode: @@ -1983,8 +1984,26 @@ def sample_prior_predictive( ) names = get_default_varnames(vars_, include_transformed=False) - vars_to_sample = [model[name] for name in names] + + # Any variables from var_names that are missing must be transformed variables. + # Misspelled variables would have raised a KeyError above. + missing_names = vars_.difference(names) + for name in missing_names: + transformed_value_var = model[name] + rv_var = model.values_to_rvs[transformed_value_var] + transform = transformed_value_var.tag.transform + transformed_rv_var = transform.forward(rv_var, rv_var) + + names.append(name) + vars_to_sample.append(transformed_rv_var) + + # If the user asked for the transformed variable in var_names, but not the + # original RV, we add it manually here + if rv_var.name not in names: + names.append(rv_var.name) + vars_to_sample.append(rv_var) + inputs = [i for i in inputvars(vars_to_sample) if not isinstance(i, SharedVariable)] sampler_fn = compile_rv_inplace( diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py index 07470dadf8d..9ac914e335b 100644 --- a/pymc3/smc/smc.py +++ b/pymc3/smc/smc.py @@ -12,11 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings + from collections import OrderedDict import aesara.tensor as at import numpy as np +from aesara import config from aesara import function as aesara_function from scipy.special import logsumexp from scipy.stats import multivariate_normal @@ -87,7 +90,7 @@ def initialize_population(self): if self.start is None: init_rnd = sample_prior_predictive( self.draws, - var_names=[v.name for v in self.model.unobserved_RVs], + var_names=[v.name for v in self.model.unobserved_value_vars], model=self.model, ) else: @@ -290,9 +293,21 @@ def logp_forw(point, out_vars, vars, shared): shared: List containing :class:`aesara.tensor.Tensor` for depended shared data """ + out_list, inarray0 = join_nonshared_inputs(point, out_vars, vars, shared) - f = aesara_function([inarray0], out_list[0]) - f.trust_input = True + # TODO: Figure out how to safely accept float32 (floatX) input when there are + # discrete variables of int64 dtype in `vars`. + # See https://github.com/pymc-devs/pymc3/pull/4769#issuecomment-861494080 + if config.floatX == "float32" and any(var.dtype == "int64" for var in vars): + warnings.warn( + "SMC sampling may run slower due to the presence of discrete variables " + "together with aesara.config.floatX == `float32`", + UserWarning, + ) + f = aesara_function([inarray0], out_list[0], allow_input_downcast=True) + else: + f = aesara_function([inarray0], out_list[0]) + f.trust_input = False return f diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py index 79ec2f0ed63..f8d4e74e4ed 100644 --- a/pymc3/tests/test_sampling.py +++ b/pymc3/tests/test_sampling.py @@ -1076,6 +1076,66 @@ def test_potentials_warning(self): with pytest.warns(UserWarning, match=warning_msg): pm.sample_prior_predictive(samples=5) + def test_transformed_vars(self): + # Test that prior predictive returns transformation of RVs when these are + # passed explicitly in `var_names` + + def ub_interval_forward(x, ub): + # Interval transform assuming lower bound is zero + return np.log(x - 0) - np.log(ub - x) + + with pm.Model(rng_seeder=123) as model: + ub = pm.HalfNormal("ub", 10) + x = pm.Uniform("x", 0, ub) + + prior = pm.sample_prior_predictive( + var_names=["ub", "ub_log__", "x", "x_interval__"], + samples=10, + ) + + # Check values are correct + assert np.allclose(prior["ub_log__"], np.log(prior["ub"])) + assert np.allclose( + prior["x_interval__"], + ub_interval_forward(prior["x"], prior["ub"]), + ) + + # Check that it works when the original RVs are not mentioned in var_names + with pm.Model(rng_seeder=123) as model_transformed_only: + ub = pm.HalfNormal("ub", 10) + x = pm.Uniform("x", 0, ub) + + prior_transformed_only = pm.sample_prior_predictive( + var_names=["ub_log__", "x_interval__"], + samples=10, + ) + assert "ub" not in prior_transformed_only and "x" not in prior_transformed_only + assert np.allclose(prior["ub_log__"], prior_transformed_only["ub_log__"]) + assert np.allclose(prior["x_interval__"], prior_transformed_only["x_interval__"]) + + def test_issue_4490(self): + # Test that samples do not depend on var_name order or, more fundamentally, + # that they do not depend on the set order used inside `sample_prior_predictive` + seed = 4490 + with pm.Model(rng_seeder=seed) as m1: + a = pm.Normal("a") + b = pm.Normal("b") + c = pm.Normal("c") + d = pm.Normal("d") + prior1 = pm.sample_prior_predictive(samples=1, var_names=["a", "b", "c", "d"]) + + with pm.Model(rng_seeder=seed) as m2: + a = pm.Normal("a") + b = pm.Normal("b") + c = pm.Normal("c") + d = pm.Normal("d") + prior2 = pm.sample_prior_predictive(samples=1, var_names=["b", "a", "d", "c"]) + + assert prior1["a"] == prior2["a"] + assert prior1["b"] == prior2["b"] + assert prior1["c"] == prior2["c"] + assert prior1["d"] == prior2["d"] + class TestSamplePosteriorPredictive: def test_point_list_arg_bug_spp(self, point_list_arg_bug_fixture): diff --git a/pymc3/tests/test_smc.py b/pymc3/tests/test_smc.py index 980dace8115..f6f01e2e484 100644 --- a/pymc3/tests/test_smc.py +++ b/pymc3/tests/test_smc.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara import aesara.tensor as at import numpy as np import pytest @@ -97,7 +98,16 @@ def test_start(self): } trace = pm.sample_smc(500, start=start) + def test_slowdown_warning(self): + with aesara.config.change_flags(floatX="float32"): + with pytest.warns(UserWarning, match="SMC sampling may run slower due to"): + with pm.Model() as model: + a = pm.Poisson("a", 5) + y = pm.Normal("y", a, 5, observed=[1, 2, 3, 4]) + trace = pm.sample_smc() + +@pytest.mark.xfail(reason="SMC-ABC not refactored yet") class TestSMCABC(SeededTest): def setup_class(self): super().setup_class()