From c6fd67a1b1d2f5cf832c6278af0a80a9628c7c76 Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Fri, 8 Sep 2023 12:05:33 -0700 Subject: [PATCH 01/11] Add initial VI --- birdman/model_base.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/birdman/model_base.py b/birdman/model_base.py index f9fd54e..0c01418 100644 --- a/birdman/model_base.py +++ b/birdman/model_base.py @@ -137,7 +137,21 @@ def add_parameters(self, param_dict: dict = None): param_dict = dict() self.dat.update(param_dict) - def fit_model( + def _fit_model_vi( + self, + sampler_args: dict = None + ): + if sampler_args is None: + sampler_args = dict() + + _fit = self.sm.variational( + data=self.dat, + seed=self.seed, + **sampler_args, + iter=self.num_iter + ) + + def _fit_model_mcmc( self, sampler_args: dict = None, convert_to_inference: bool = False @@ -179,6 +193,20 @@ def fit_model( category=UserWarning ) + def fit_model( + self, + method: str = "mcmc", + sampler_args: dict = None, + convert_to_inference: bool = False + ): + if method == "mcmc": + self._fit_model_mcmc(sampler_args, + convert_to_inference=convert_to_inference) + elif method == "vi": + self._fit_model_vi(sampler_args) + else: + raise ValueError("method must be either 'mcmc' or 'vi'") + @abstractmethod def to_inference(self): """Convert fitted model to az.InferenceData.""" From 02dba1ad042fe780042e241f3ca4e1fc94b55b36 Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Fri, 8 Sep 2023 12:27:13 -0700 Subject: [PATCH 02/11] VI working --- birdman/model_base.py | 2 ++ tests/test_vi.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 tests/test_vi.py diff --git a/birdman/model_base.py b/birdman/model_base.py index 0c01418..dd58b30 100644 --- a/birdman/model_base.py +++ b/birdman/model_base.py @@ -151,6 +151,8 @@ def _fit_model_vi( iter=self.num_iter ) + self.fit = _fit + def _fit_model_mcmc( self, sampler_args: dict = None, diff --git a/tests/test_vi.py b/tests/test_vi.py new file mode 100644 index 0000000..8dfad32 --- /dev/null +++ b/tests/test_vi.py @@ -0,0 +1,36 @@ +from pkg_resources import resource_filename + +from birdman import (NegativeBinomial, NegativeBinomialLME, + NegativeBinomialSingle, ModelIterator) + +def test_nb_fit_vi(table_biom, metadata): + nb = NegativeBinomial( + table=table_biom, + formula="host_common_name", + metadata=metadata, + chains=4, + seed=42, + num_iter=500, + beta_prior=2.0, + inv_disp_sd=0.5, + ) + nb.compile_model() + nb.fit_model(method="vi", sampler_args={"output_samples": 100}) + sample_shape = nb.fit.variational_sample.shape + assert sample_shape[0] == 100 + + +def test_nb_single_fit_vi(table_biom, metadata): + md = metadata.copy() + for fid in table_biom.ids(axis="observation"): + nb = NegativeBinomialSingle( + table=table_biom, + feature_id=fid, + formula="host_common_name", + metadata=md, + num_iter=500, + ) + nb.compile_model() + nb.fit_model(method="vi", + sampler_args={"output_samples": 100, "grad_samples": 20}) + assert nb.fit.variational_sample.shape[0] == 100 From 814daa5b1a7fb49cc9e0336b598a19266e236aa8 Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Fri, 8 Sep 2023 13:06:28 -0700 Subject: [PATCH 03/11] Initial refactor of model fitting --- birdman/default_models.py | 16 ------ birdman/model_base.py | 103 +++++++++++++++++++++----------------- tests/test_vi.py | 17 ++++--- 3 files changed, 66 insertions(+), 70 deletions(-) diff --git a/birdman/default_models.py b/birdman/default_models.py index 8a6008a..6a20924 100644 --- a/birdman/default_models.py +++ b/birdman/default_models.py @@ -82,10 +82,6 @@ def __init__( table: biom.table.Table, formula: str, metadata: pd.DataFrame, - num_iter: int = 500, - num_warmup: int = None, - chains: int = 4, - seed: float = 42, beta_prior: float = 5.0, inv_disp_sd: float = 0.5, ): @@ -94,10 +90,6 @@ def __init__( super().__init__( table=table, model_path=filepath, - num_iter=num_iter, - num_warmup=num_warmup, - chains=chains, - seed=seed, ) self.create_regression(formula=formula, metadata=metadata) @@ -198,10 +190,6 @@ def __init__( feature_id: str, formula: str, metadata: pd.DataFrame, - num_iter: int = 500, - num_warmup: int = None, - chains: int = 4, - seed: float = 42, beta_prior: float = 5.0, inv_disp_sd: float = 0.5, ): @@ -211,10 +199,6 @@ def __init__( table=table, feature_id=feature_id, model_path=filepath, - num_iter=num_iter, - num_warmup=num_warmup, - chains=chains, - seed=seed, ) self.create_regression(formula=formula, metadata=metadata) diff --git a/birdman/model_base.py b/birdman/model_base.py index dd58b30..5bd7b90 100644 --- a/birdman/model_base.py +++ b/birdman/model_base.py @@ -20,36 +20,12 @@ class BaseModel(ABC): :param model_path: Filepath to Stan model :type model_path: str - - :param num_iter: Number of posterior sample draws, defaults to 500 - :type num_iter: int - - :param num_warmup: Number of posterior draws used for warmup, defaults to - num_iter - :type num_warmup: int - - :param chains: Number of chains to use in MCMC, defaults to 4 - :type chains: int - - :param seed: Random seed to use for sampling, defaults to 42 - :type seed: float """ def __init__( self, table: biom.table.Table, model_path: str, - num_iter: int = 500, - num_warmup: int = None, - chains: int = 4, - seed: float = 42, ): - self.num_iter = num_iter - if num_warmup is None: - self.num_warmup = num_iter - else: - self.num_warmup = num_warmup - self.chains = chains - self.seed = seed self.sample_names = table.ids(axis="sample") self.model_path = model_path self.sm = None @@ -139,24 +115,33 @@ def add_parameters(self, param_dict: dict = None): def _fit_model_vi( self, - sampler_args: dict = None + num_draws, + vi_iter, + vi_grad_samples, + vi_require_converged, + seed, + **vi_kwargs ): - if sampler_args is None: - sampler_args = dict() - _fit = self.sm.variational( data=self.dat, - seed=self.seed, - **sampler_args, - iter=self.num_iter + iter=vi_iter, + output_samples=num_draws, + grad_samples=vi_grad_samples, + require_converged=vi_require_converged, + seed=seed, + **vi_kwargs ) self.fit = _fit def _fit_model_mcmc( self, - sampler_args: dict = None, - convert_to_inference: bool = False + num_draws, + mcmc_warmup, + mcmc_chains, + seed, + convert_to_inference, + **mcmc_kwargs ): """Fit Stan model. @@ -168,17 +153,14 @@ def _fit_model_mcmc( inference given model specifications, defaults to False :type convert_to_inference: bool """ - if sampler_args is None: - sampler_args = dict() - _fit = self.sm.sample( - chains=self.chains, - parallel_chains=self.chains, + chains=mcmc_chains, + parallel_chains=mcmc_chains, data=self.dat, - iter_warmup=self.num_warmup, - iter_sampling=self.num_iter, - seed=self.seed, - **sampler_args + iter_warmup=mcmc_warmup, + iter_sampling=num_draws, + seed=seed, + **kwargs ) self.fit = _fit @@ -198,14 +180,41 @@ def _fit_model_mcmc( def fit_model( self, method: str = "mcmc", - sampler_args: dict = None, - convert_to_inference: bool = False + num_draws: int = 500, + mcmc_warmup: int = None, + mcmc_chains: int = 4, + vi_iter: int = 1000, + vi_grad_samples: int = 40, + vi_require_converged: bool = False, + seed: float = 42, + convert_to_inference: bool = False, + mcmc_kwargs: dict = None, + vi_kwargs: dict = None ): if method == "mcmc": - self._fit_model_mcmc(sampler_args, - convert_to_inference=convert_to_inference) + mcmc_kwargs = mcmc_kwargs or dict() + mcmc_warmup = mcmc_warmup or mcmc_warmup + + self._fit_model_mcmc( + chains=mcmc_chains, + parallel_chains=mcmc_chains, + iter_warmup=mcmc_warmup, + iter_sampling=num_draw, + convert_to_inference=convert_to_inference, + seed=seed, + **mcmc_kwargs + ) elif method == "vi": - self._fit_model_vi(sampler_args) + vi_kwargs = vi_kwargs or dict() + + self._fit_model_vi( + vi_iter=vi_iter, + num_draws=num_draws, + vi_grad_samples=vi_grad_samples, + vi_require_converged=vi_require_converged, + seed=seed, + **vi_kwargs + ) else: raise ValueError("method must be either 'mcmc' or 'vi'") diff --git a/tests/test_vi.py b/tests/test_vi.py index 8dfad32..f03c445 100644 --- a/tests/test_vi.py +++ b/tests/test_vi.py @@ -8,14 +8,15 @@ def test_nb_fit_vi(table_biom, metadata): table=table_biom, formula="host_common_name", metadata=metadata, - chains=4, - seed=42, - num_iter=500, beta_prior=2.0, inv_disp_sd=0.5, ) nb.compile_model() - nb.fit_model(method="vi", sampler_args={"output_samples": 100}) + nb.fit_model( + method="vi", + num_draws=100, + vi_iter=1000 + ) sample_shape = nb.fit.variational_sample.shape assert sample_shape[0] == 100 @@ -28,9 +29,11 @@ def test_nb_single_fit_vi(table_biom, metadata): feature_id=fid, formula="host_common_name", metadata=md, - num_iter=500, ) nb.compile_model() - nb.fit_model(method="vi", - sampler_args={"output_samples": 100, "grad_samples": 20}) + nb.fit_model( + method="vi", + vi_iter=1000, + num_draws=100, + ) assert nb.fit.variational_sample.shape[0] == 100 From 5729bd399d020aea5813c03ec90073a8055667d3 Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Fri, 8 Sep 2023 13:30:54 -0700 Subject: [PATCH 04/11] test_model works. Removing auto-conversion --- birdman/default_models.py | 8 -------- birdman/model_base.py | 32 ++++++++--------------------- tests/conftest.py | 8 ++------ tests/test_model.py | 43 +++++---------------------------------- 4 files changed, 15 insertions(+), 76 deletions(-) diff --git a/birdman/default_models.py b/birdman/default_models.py index 6a20924..58ed956 100644 --- a/birdman/default_models.py +++ b/birdman/default_models.py @@ -305,10 +305,6 @@ def __init__( formula: str, group_var: str, metadata: pd.DataFrame, - num_iter: int = 500, - num_warmup: int = None, - chains: int = 4, - seed: float = 42, beta_prior: float = 5.0, inv_disp_sd: float = 0.5, group_var_prior: float = 1.0 @@ -317,10 +313,6 @@ def __init__( super().__init__( table=table, model_path=filepath, - num_iter=num_iter, - num_warmup=num_warmup, - chains=chains, - seed=seed, ) self.create_regression(formula=formula, metadata=metadata) diff --git a/birdman/model_base.py b/birdman/model_base.py index 5bd7b90..4308154 100644 --- a/birdman/model_base.py +++ b/birdman/model_base.py @@ -132,7 +132,7 @@ def _fit_model_vi( **vi_kwargs ) - self.fit = _fit + return _fit def _fit_model_mcmc( self, @@ -140,7 +140,6 @@ def _fit_model_mcmc( mcmc_warmup, mcmc_chains, seed, - convert_to_inference, **mcmc_kwargs ): """Fit Stan model. @@ -160,22 +159,10 @@ def _fit_model_mcmc( iter_warmup=mcmc_warmup, iter_sampling=num_draws, seed=seed, - **kwargs + **mcmc_kwargs ) - self.fit = _fit - - # If auto-conversion fails, fit will be of type CmdStanMCMC - if convert_to_inference: - try: - self.fit = self.to_inference() - except Exception as e: - warnings.warn( - "Auto conversion to InferenceData has failed! fit has " - "been saved as CmdStanMCMC instead. See error message" - f": \n{type(e).__name__}: {e}", - category=UserWarning - ) + return _fit def fit_model( self, @@ -187,7 +174,6 @@ def fit_model( vi_grad_samples: int = 40, vi_require_converged: bool = False, seed: float = 42, - convert_to_inference: bool = False, mcmc_kwargs: dict = None, vi_kwargs: dict = None ): @@ -195,19 +181,17 @@ def fit_model( mcmc_kwargs = mcmc_kwargs or dict() mcmc_warmup = mcmc_warmup or mcmc_warmup - self._fit_model_mcmc( - chains=mcmc_chains, - parallel_chains=mcmc_chains, - iter_warmup=mcmc_warmup, - iter_sampling=num_draw, - convert_to_inference=convert_to_inference, + self.fit = self._fit_model_mcmc( + mcmc_chains=mcmc_chains, + mcmc_warmup=mcmc_warmup, + num_draws=num_draws, seed=seed, **mcmc_kwargs ) elif method == "vi": vi_kwargs = vi_kwargs or dict() - self._fit_model_vi( + self.fit = self._fit_model_vi( vi_iter=vi_iter, num_draws=num_draws, vi_grad_samples=vi_grad_samples, diff --git a/tests/conftest.py b/tests/conftest.py index ce150a1..4ee42cb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -45,11 +45,9 @@ def model(): table=tbl, formula="host_common_name", metadata=md, - num_iter=100, - chains=4, ) nb.compile_model() - nb.fit_model() + nb.fit_model(mcmc_chains=4, num_draws=100) return nb @@ -75,12 +73,10 @@ def single_feat_model(): formula="host_common_name", metadata=md, feature_id=id0, - num_iter=100, - chains=4, ) nb.compile_model() - nb.fit_model() + nb.fit_model(mcmc_chains=4, num_draws=100) return nb diff --git a/tests/test_model.py b/tests/test_model.py index 1f4bcaa..2195cb6 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -16,8 +16,6 @@ def test_nb_model(self, table_biom, metadata): table=table_biom, formula="host_common_name", metadata=metadata, - chains=4, - seed=42, beta_prior=2.0, inv_disp_sd=0.5, ) @@ -35,8 +33,8 @@ def test_nb_fit(self, table_biom, example_model): num_cov = 2 num_chains = 4 num_table_feats = 28 - num_iter = 100 - num_draws = num_chains*num_iter + num_draws = 100 + num_draws = num_chains*num_draws assert beta.shape == (num_draws, num_cov, num_table_feats - 1) assert inv_disp.shape == (num_draws, num_table_feats) @@ -50,10 +48,9 @@ def test_nb_lme(self, table_biom, metadata): formula="host_common_name", group_var="group", metadata=md, - num_iter=100, ) nb_lme.compile_model() - nb_lme.fit_model() + nb_lme.fit_model(num_draws=100) inf = nb_lme.to_inference() post = inf.posterior @@ -70,30 +67,9 @@ def test_single_feat(self, table_biom, metadata): feature_id=fid, formula="host_common_name", metadata=md, - num_iter=100, ) nb.compile_model() - nb.fit_model(convert_to_inference=True) - - def test_fail_auto_conversion(self, table_biom, metadata): - nb = NegativeBinomial( - table=table_biom, - metadata=metadata, - formula="host_common_name", - num_iter=100, - ) - nb.compile_model() - nb.specified = False - with pytest.warns(UserWarning) as r: - nb.fit_model(convert_to_inference=True) - - e = "Model has not been specified!" - expected_warning = ( - "Auto conversion to InferenceData has failed! fit has " - "been saved as CmdStanMCMC instead. See error message" - f": \nValueError: {e}" - ) - assert r[0].message.args[0] == expected_warning + nb.fit_model(num_draws=100) class TestToInference: @@ -129,9 +105,6 @@ def test_iteration(self, table_biom, metadata): model=NegativeBinomialSingle, formula="host_common_name", metadata=metadata, - num_iter=100, - chains=4, - seed=42 ) iterated_feature_ids = [] @@ -155,9 +128,6 @@ def test_chunks(self, table_biom, metadata): formula="host_common_name", num_chunks=3, metadata=metadata, - num_iter=100, - chains=4, - seed=42 ) tbl_fids = list(table_biom.ids("observation")) @@ -186,12 +156,9 @@ def test_iteration_fit(self, table_biom, metadata): model=NegativeBinomialSingle, formula="host_common_name", metadata=metadata, - num_iter=100, - chains=4, - seed=42 ) for fit, model in model_iterator: model.compile_model() - model.fit_model() + model.fit_model(num_draws=100, mcmc_chains=4, seed=42) _ = model.to_inference() From 51e881195f2921724abd1196931ae2c98fce8a91 Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Fri, 8 Sep 2023 13:32:43 -0700 Subject: [PATCH 05/11] test_custom_model passing --- tests/test_custom_model.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_custom_model.py b/tests/test_custom_model.py index 66b194e..8a982d3 100644 --- a/tests/test_custom_model.py +++ b/tests/test_custom_model.py @@ -12,9 +12,6 @@ def test_custom_model(table_biom, metadata): custom_model = TableModel( table=table_biom, model_path=resource_filename("tests", "custom_model.stan"), - num_iter=100, - chains=4, - seed=42, ) custom_model.create_regression( formula="host_common_name", @@ -41,7 +38,7 @@ def test_custom_model(table_biom, metadata): }, ) custom_model.compile_model() - custom_model.fit_model() + custom_model.fit_model(num_draws=100, mcmc_chains=4, seed=42) inference = custom_model.to_inference() assert set(inference.groups()) == {"posterior", "sample_stats"} From b0e9769f8297ed80d33e94f8ec0626e7e629594f Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Fri, 8 Sep 2023 13:36:08 -0700 Subject: [PATCH 06/11] Remove redundant fitting code --- birdman/model_base.py | 72 ++++++++----------------------------------- 1 file changed, 12 insertions(+), 60 deletions(-) diff --git a/birdman/model_base.py b/birdman/model_base.py index 4308154..2dbc079 100644 --- a/birdman/model_base.py +++ b/birdman/model_base.py @@ -113,57 +113,6 @@ def add_parameters(self, param_dict: dict = None): param_dict = dict() self.dat.update(param_dict) - def _fit_model_vi( - self, - num_draws, - vi_iter, - vi_grad_samples, - vi_require_converged, - seed, - **vi_kwargs - ): - _fit = self.sm.variational( - data=self.dat, - iter=vi_iter, - output_samples=num_draws, - grad_samples=vi_grad_samples, - require_converged=vi_require_converged, - seed=seed, - **vi_kwargs - ) - - return _fit - - def _fit_model_mcmc( - self, - num_draws, - mcmc_warmup, - mcmc_chains, - seed, - **mcmc_kwargs - ): - """Fit Stan model. - - :param sampler_args: Additional parameters to pass to CmdStanPy - sampler (optional) - :type sampler_args: dict - - :param convert_to_inference: Whether to automatically convert to - inference given model specifications, defaults to False - :type convert_to_inference: bool - """ - _fit = self.sm.sample( - chains=mcmc_chains, - parallel_chains=mcmc_chains, - data=self.dat, - iter_warmup=mcmc_warmup, - iter_sampling=num_draws, - seed=seed, - **mcmc_kwargs - ) - - return _fit - def fit_model( self, method: str = "mcmc", @@ -181,21 +130,24 @@ def fit_model( mcmc_kwargs = mcmc_kwargs or dict() mcmc_warmup = mcmc_warmup or mcmc_warmup - self.fit = self._fit_model_mcmc( - mcmc_chains=mcmc_chains, - mcmc_warmup=mcmc_warmup, - num_draws=num_draws, + self.fit = self.sm.sample( + chains=mcmc_chains, + parallel_chains=mcmc_chains, + data=self.dat, + iter_warmup=mcmc_warmup, + iter_sampling=num_draws, seed=seed, **mcmc_kwargs ) elif method == "vi": vi_kwargs = vi_kwargs or dict() - self.fit = self._fit_model_vi( - vi_iter=vi_iter, - num_draws=num_draws, - vi_grad_samples=vi_grad_samples, - vi_require_converged=vi_require_converged, + self.fit = self.sm.variational( + data=self.dat, + iter=vi_iter, + output_samples=num_draws, + grad_samples=vi_grad_samples, + require_converged=vi_require_converged, seed=seed, **vi_kwargs ) From 76f08956ee5913240d43b9a52a8a72270b304057 Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Fri, 8 Sep 2023 13:46:30 -0700 Subject: [PATCH 07/11] Reduce redundant to_inference code --- birdman/model_base.py | 69 ++++++++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 18 deletions(-) diff --git a/birdman/model_base.py b/birdman/model_base.py index 2dbc079..add08ec 100644 --- a/birdman/model_base.py +++ b/birdman/model_base.py @@ -126,6 +126,44 @@ def fit_model( mcmc_kwargs: dict = None, vi_kwargs: dict = None ): + """Fit BIRDMAn model. + + :param method: Method by which to fit model, either 'mcmc' (default) + for Markov Chain Monte Carlo or 'vi' for Variational Inference + :type method: str + + :param num_draws: Number of output draws to sample from the posterior, + default is 500 + :type num_draws: int + + :param mcmc_warmup: Number of warmup iterations for MCMC sampling, + default is the same as num_draws + :type mcmc_warmup: int + + :param mcmc_chains: Number of Markov chains to use for sampling, + default is 4 + :type mcmc_chains: int + + :param vi_iter: Number of ADVI iterations to use for VI, default is + 1000 + :type vi_iter: int + + :param vi_grad_samples: Number of MC draws for computing the gradient, + default is 40 + :type vi_grad_samples: int + + :param vi_require_converged: Whether or not to raise an error if Stan + reports that “The algorithm may not have converged”, default is + False + :type vi_require_converged: bool + + :param seed: Random seed to use for sampling, default is 42 + :type seed: int + + :param mcmc_kwargs: kwargs to pass into CmdStanModel.sample + + :param vi_kwargs: kwargs to pass into CmdStanModel.variational + """ if method == "mcmc": mcmc_kwargs = mcmc_kwargs or dict() mcmc_warmup = mcmc_warmup or mcmc_warmup @@ -158,6 +196,17 @@ def fit_model( def to_inference(self): """Convert fitted model to az.InferenceData.""" + def _check_fit_for_inf(self): + if self.fit is None: + raise ValueError("Model has not been fit!") + + # if already Inference, just return + if isinstance(self.fit, az.InferenceData): + return self.fit + + if not self.specified: + raise ValueError("Model has not been specified!") + class TableModel(BaseModel): """Fit a model on the entire table at once.""" @@ -174,15 +223,7 @@ def to_inference(self) -> az.InferenceData: :returns: ``arviz`` InferenceData object with selected values :rtype: az.InferenceData """ - if self.fit is None: - raise ValueError("Model has not been fit!") - - # if already Inference, just return - if isinstance(self.fit, az.InferenceData): - return self.fit - - if not self.specified: - raise ValueError("Model has not been specified!") + self._check_fit_for_inf() inference = full_fit_to_inference( fit=self.fit, @@ -227,15 +268,7 @@ def to_inference(self) -> az.InferenceData: :returns: ``arviz`` InferenceData object with selected values :rtype: az.InferenceData """ - if self.fit is None: - raise ValueError("Model has not been fit!") - - # if already Inference, just return - if isinstance(self.fit, az.InferenceData): - return self.fit - - if not self.specified: - raise ValueError("Model has not been specified!") + self._check_fit_for_inf() inference = single_feature_fit_to_inference( fit=self.fit, From 6555a078a399e43f2fc9b5a310a27fb598f03dff Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Fri, 8 Sep 2023 13:47:46 -0700 Subject: [PATCH 08/11] Update documentation for default models --- birdman/default_models.py | 39 --------------------------------------- 1 file changed, 39 deletions(-) diff --git a/birdman/default_models.py b/birdman/default_models.py index 58ed956..6eeef9c 100644 --- a/birdman/default_models.py +++ b/birdman/default_models.py @@ -56,19 +56,6 @@ class NegativeBinomial(TableModel): :param metadata: Metadata for design matrix :type metadata: pd.DataFrame - :param num_iter: Number of posterior sample draws, defaults to 500 - :type num_iter: int - - :param num_warmup: Number of posterior draws used for warmup, defaults to - num_iter - :type num_warmup: int - - :param chains: Number of chains to use in MCMC, defaults to 4 - :type chains: int - - :param seed: Random seed to use for sampling, defaults to 42 - :type seed: float - :param beta_prior: Standard deviation for normally distributed prior values of beta, defaults to 5.0 :type beta_prior: float @@ -163,19 +150,6 @@ class NegativeBinomialSingle(SingleFeatureModel): :param metadata: Metadata for design matrix :type metadata: pd.DataFrame - :param num_iter: Number of posterior sample draws, defaults to 500 - :type num_iter: int - - :param num_warmup: Number of posterior draws used for warmup, defaults to - num_iter - :type num_warmup: int - - :param chains: Number of chains to use in MCMC, defaults to 4 - :type chains: int - - :param seed: Random seed to use for sampling, defaults to 42 - :type seed: float - :param beta_prior: Standard deviation for normally distributed prior values of beta, defaults to 5.0 :type beta_prior: float @@ -274,19 +248,6 @@ class NegativeBinomialLME(TableModel): :param metadata: Metadata for design matrix :type metadata: pd.DataFrame - :param num_iter: Number of posterior sample draws, defaults to 500 - :type num_iter: int - - :param num_warmup: Number of posterior draws used for warmup, defaults to - num_iter - :type num_warmup: int - - :param chains: Number of chains to use in MCMC, defaults to 4 - :type chains: int - - :param seed: Random seed to use for sampling, defaults to 42 - :type seed: float - :param beta_prior: Standard deviation for normally distributed prior values of beta, defaults to 5.0 :type beta_prior: float From 196c1fad50cc3b97059a3b0fd7d146c7f3beb799 Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Fri, 8 Sep 2023 14:00:33 -0700 Subject: [PATCH 09/11] Update inference code --- birdman/{model_util.py => inference.py} | 21 +++---------------- birdman/model_base.py | 2 +- birdman/util.py | 18 ++++++++++++++++ .../{test_model_util.py => test_inference.py} | 2 +- 4 files changed, 23 insertions(+), 20 deletions(-) rename birdman/{model_util.py => inference.py} (90%) create mode 100644 birdman/util.py rename tests/{test_model_util.py => test_inference.py} (98%) diff --git a/birdman/model_util.py b/birdman/inference.py similarity index 90% rename from birdman/model_util.py rename to birdman/inference.py index 1197f88..d36de8b 100644 --- a/birdman/model_util.py +++ b/birdman/inference.py @@ -1,10 +1,11 @@ -import re -from typing import List, Sequence +from typing import List, Sequence, Union import arviz as az from cmdstanpy import CmdStanMCMC import xarray as xr +from .util import _drop_data + def full_fit_to_inference( fit: CmdStanMCMC, @@ -164,19 +165,3 @@ def concatenate_inferences( all_group_inferences.append(group_inf) return az.concat(*all_group_inferences) - - -def _drop_data( - dataset: xr.Dataset, - vars_to_drop: Sequence[str], -) -> xr.Dataset: - """Drop data and associated dimensions from inference group.""" - new_dataset = dataset.drop_vars(vars_to_drop) - # TODO: Figure out how to do this more cleanly - dims_to_drop = [] - for var in vars_to_drop: - for dim in new_dataset.dims: - if re.match(f"{var}_dim_\\d", dim): - dims_to_drop.append(dim) - new_dataset = new_dataset.drop_dims(dims_to_drop) - return new_dataset diff --git a/birdman/model_base.py b/birdman/model_base.py index add08ec..2ea28f3 100644 --- a/birdman/model_base.py +++ b/birdman/model_base.py @@ -9,7 +9,7 @@ import pandas as pd from patsy import dmatrix -from .model_util import full_fit_to_inference, single_feature_fit_to_inference +from .inference import full_fit_to_inference, single_feature_fit_to_inference class BaseModel(ABC): diff --git a/birdman/util.py b/birdman/util.py new file mode 100644 index 0000000..aba2441 --- /dev/null +++ b/birdman/util.py @@ -0,0 +1,18 @@ +import re +from typing import Sequence + +import arviz as az +import xarray as xr + + +def _drop_data(dataset: xr.Dataset, vars_to_drop: Sequence[str]) -> xr.Dataset: + """Drop data and associated dimensions from inference group.""" + new_dataset = dataset.drop_vars(vars_to_drop) + # TODO: Figure out how to do this more cleanly + dims_to_drop = [] + for var in vars_to_drop: + for dim in new_dataset.dims: + if re.match(f"{var}_dim_\\d", dim): + dims_to_drop.append(dim) + new_dataset = new_dataset.drop_dims(dims_to_drop) + return new_dataset diff --git a/tests/test_model_util.py b/tests/test_inference.py similarity index 98% rename from tests/test_model_util.py rename to tests/test_inference.py index 7b94c6f..06e89bc 100644 --- a/tests/test_model_util.py +++ b/tests/test_inference.py @@ -1,6 +1,6 @@ import numpy as np -from birdman import model_util as mu +from birdman import inference as mu class TestToInference: From a7b88040ad3274cb92b850096d049a86c660047b Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Tue, 12 Sep 2023 09:38:14 -0700 Subject: [PATCH 10/11] Fix flake8 issues --- birdman/inference.py | 2 +- birdman/model_base.py | 1 - birdman/util.py | 1 - tests/test_model.py | 1 - tests/test_vi.py | 4 +--- 5 files changed, 2 insertions(+), 7 deletions(-) diff --git a/birdman/inference.py b/birdman/inference.py index d36de8b..995c14d 100644 --- a/birdman/inference.py +++ b/birdman/inference.py @@ -1,4 +1,4 @@ -from typing import List, Sequence, Union +from typing import List, Sequence import arviz as az from cmdstanpy import CmdStanMCMC diff --git a/birdman/model_base.py b/birdman/model_base.py index 2ea28f3..1911f8f 100644 --- a/birdman/model_base.py +++ b/birdman/model_base.py @@ -1,7 +1,6 @@ from abc import ABC, abstractmethod from math import ceil from typing import Sequence -import warnings import arviz as az import biom diff --git a/birdman/util.py b/birdman/util.py index aba2441..0c5271c 100644 --- a/birdman/util.py +++ b/birdman/util.py @@ -1,7 +1,6 @@ import re from typing import Sequence -import arviz as az import xarray as xr diff --git a/tests/test_model.py b/tests/test_model.py index 2195cb6..d8d6131 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -2,7 +2,6 @@ from pkg_resources import resource_filename import numpy as np -import pytest from birdman import (NegativeBinomial, NegativeBinomialLME, NegativeBinomialSingle, ModelIterator) diff --git a/tests/test_vi.py b/tests/test_vi.py index f03c445..750fba9 100644 --- a/tests/test_vi.py +++ b/tests/test_vi.py @@ -1,7 +1,5 @@ -from pkg_resources import resource_filename +from birdman import NegativeBinomial, NegativeBinomialSingle -from birdman import (NegativeBinomial, NegativeBinomialLME, - NegativeBinomialSingle, ModelIterator) def test_nb_fit_vi(table_biom, metadata): nb = NegativeBinomial( From 3befd9e9cff555bba600ec808df16b1eaba3aa1b Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Tue, 12 Sep 2023 09:57:54 -0700 Subject: [PATCH 11/11] Fix custom model test --- tests/custom_model.stan | 35 ++++++++++++++++++++--------------- tests/test_custom_model.py | 10 ++++++---- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/tests/custom_model.stan b/tests/custom_model.stan index e67156b..1bf2b62 100644 --- a/tests/custom_model.stan +++ b/tests/custom_model.stan @@ -1,42 +1,47 @@ -# Separate priors for intercept and host_common_name data { int N; // number of samples int D; // number of dimensions + real A; // mean intercept int p; // number of covariates - real depth[N]; // sequencing depths of microbes + vector[N] depth; // sequencing depths of microbes matrix[N, p] x; // covariate matrix - int y[N, D]; // observed microbe abundances - real B_p_1; // stdev for Beta Normal prior - real B_p_2; // stdev for Beta Normal prior + array[N, D] int y; // observed microbe abundances + real B_p; // stdev for intercept real phi_s; // constant phi value } parameters { // parameters required for linear regression on the species means - matrix[p, D-1] beta_var; + row_vector[D-1] beta_0; + matrix[p-1, D-1] beta_x; + real inv_disp; } transformed parameters { + matrix[p, D-1] beta_var = append_row(beta_0, beta_x); matrix[N, D-1] lam; matrix[N, D] lam_clr; - vector[N] z; - z = to_vector(rep_array(0, N)); - lam = x * beta_var; - lam_clr = append_col(z, lam); + lam = x*beta_var; + for (n in 1:N){ + lam[n] += depth[n]; + } + lam_clr = append_col(to_vector(rep_array(0, N)), lam); } model { - // setting priors ... + inv_disp ~ lognormal(0, phi_s); + for (i in 1:D-1){ - beta_var[1, i] ~ normal(0., B_p_1); // uninformed prior - beta_var[2, i] ~ normal(0., B_p_2); // uninformed prior + for (j in 1:p){ + beta_var[j, i] ~ normal(0., B_p); // uninformed prior + } } + // generating counts for (n in 1:N){ for (i in 1:D){ - target += neg_binomial_2_log_lpmf(y[n, i] | depth[n] + lam_clr[n, i], phi_s); + target += neg_binomial_2_log_lpmf(y[n, i] | lam_clr[n, i], inv(inv_disp)); } } } - diff --git a/tests/test_custom_model.py b/tests/test_custom_model.py index 8a982d3..2da859a 100644 --- a/tests/test_custom_model.py +++ b/tests/test_custom_model.py @@ -7,8 +7,10 @@ def test_custom_model(table_biom, metadata): - # Negative binomial model with separate prior values for intercept & - # host_common_name effect and constant overdispersion parameter. + # Negative binomial model with constant overdispersion parameter. + D = table_biom.shape[0] + A = np.log(1 / D) + custom_model = TableModel( table=table_biom, model_path=resource_filename("tests", "custom_model.stan"), @@ -19,10 +21,10 @@ def test_custom_model(table_biom, metadata): ) custom_model.add_parameters( { - "B_p_1": 2.0, - "B_p_2": 5.0, + "B_p": 2.0, "phi_s": 0.2, "depth": np.log(table_biom.sum(axis="sample")), + "A": A } ) custom_model.specify_model(