From e1edd3db1ef723cc69771e4bb3a0ad53d1302de5 Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Sun, 29 Oct 2023 11:51:16 -0700 Subject: [PATCH 1/2] Add new docs --- docs/custom_model.rst | 27 ++++++++++++++++++--------- docs/default_model_example.rst | 13 +++++++++---- docs/index.rst | 5 +++-- docs/inference.rst | 6 ++++++ docs/models.rst | 2 ++ 5 files changed, 38 insertions(+), 15 deletions(-) create mode 100644 docs/inference.rst diff --git a/docs/custom_model.rst b/docs/custom_model.rst index 6d5f960..03bef66 100644 --- a/docs/custom_model.rst +++ b/docs/custom_model.rst @@ -25,14 +25,19 @@ We then import the data into Python so we can use BIRDMAn. import biom import pandas as pd + import glob + + fpath = glob.glob("templates/*.txt")[0] table = biom.load_table("BIOM/94270/reference-hit.biom") metadata = pd.read_csv( - "templates/11913_20191016-112545.txt", + fpath, sep="\t", index_col=0 ) + metadata.head() + Processing metadata ------------------- @@ -224,9 +229,6 @@ We will now pass this file along with our table, metadata, and formula into BIRD nb_lme = birdman.TableModel( table=filt_tbl, model_path="negative_binomial_re.stan", - num_iter=500, - chains=4, - seed=42 ) nb_lme.create_regression( metadata=metadata_model.loc[samps_to_keep], @@ -272,6 +274,7 @@ Now we can add all the necessary parameters to BIRDMAn with the ``add_parameters "depth": np.log(filt_tbl.sum(axis="sample")), "B_p": 3.0, "inv_disp_sd": 3.0, + "A": np.log(1 / filt_tbl.shape[0]), "u_p": 1.0 } nb_lme.add_parameters(param_dict) @@ -294,8 +297,8 @@ We pass all these arguments into the ``specify_model`` method of the ``Model`` o dims={ "beta_var": ["covariate", "feature_alr"], "inv_disp": ["feature"], - "subj_int": ["subject"], - "log_lik": ["tbl_sample", "feature"], + "subj_int": ["subject", "feature_alr"], + "log_lhood": ["tbl_sample", "feature"], "y_predict": ["tbl_sample", "feature"] }, coords={ @@ -306,7 +309,7 @@ We pass all these arguments into the ``specify_model`` method of the ``Model`` o "tbl_sample": nb_lme.sample_names }, posterior_predictive="y_predict", - log_likelihood="log_lik", + log_likelihood="log_lhood", include_observed_data=True ) @@ -319,7 +322,7 @@ Finally, we compile and fit the model. .. code-block:: python nb_lme.compile_model() - nb_lme.fit_model() + nb_lme.fit_model(method="vi", num_draws=500) Converting to ``InferenceData`` ------------------------------- @@ -329,7 +332,13 @@ When the model has finished fitting, you can convert to an inference data assumi .. code-block:: python from birdman.transform import posterior_alr_to_clr + inference = nb_lme.to_inference() - inference.posterior = posterior_alr_to_clr(inference.posterior) + inference.posterior = posterior_alr_to_clr( + inference.posterior, + alr_params=["subj_int", "beta_var"], + dim_replacement={"feature_alr": "feature"}, + new_labels=filt_tbl.ids("observation") + ) With this you can use the rest of the BIRDMAn suite as usual or directly interact with the ``arviz`` library! diff --git a/docs/default_model_example.rst b/docs/default_model_example.rst index b4d9f50..0563fa0 100644 --- a/docs/default_model_example.rst +++ b/docs/default_model_example.rst @@ -23,13 +23,16 @@ Next, we want to import the data into Python so we can run BIRDMAn. from birdman import NegativeBinomial - table = biom.load_table("BIOM/44773/otu_table.biom") + fpath = glob.glob("templates/*.txt")[0] + table = biom.load_table("BIOM/44773/reference-hit.biom") metadata = pd.read_csv( - "templates/107_20180101-113755.txt", + fpath, sep="\t", index_col=0 ) + metadata.head() + This table has nearly 2000 features, many of which are likely lowly prevalent. We are going to filter to only features that are present in at least 5 samples. .. code-block:: python @@ -44,11 +47,12 @@ For this example we're going to use a simple formula that only takes ``diet`` in .. code-block:: python + from birdman import NegativeBinomial + nb = NegativeBinomial( table=table_filt, formula="diet", metadata=metadata, - num_iter=1000, ) We then have to compile and fit our model. This is very straightforward in BIRDMAn. @@ -68,6 +72,7 @@ Now we have our parameter estimates which we can use in downstream analyses. Man .. code-block:: python from birdman.transform import posterior_alr_to_clr + inference = nb.to_inference() inference.posterior = posterior_alr_to_clr( inference.posterior, @@ -85,7 +90,7 @@ Finally, we'll plot the feature differentials and their standard deviations. We ax = viz.plot_parameter_estimates( inference, parameter="beta_var", - coord={"covariate": "diet[T.DIO]"}, + coords={"covariate": "diet[T.DIO]"}, ) .. image:: imgs/example_differentials.png diff --git a/docs/index.rst b/docs/index.rst index c0e4edd..efec3ee 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,14 +14,14 @@ We provide several default models but also allow users to create their own stati Installation ------------ -.. note:: BIRDMAn requires Python >= 3.7 +.. note:: BIRDMAn requires Python >= 3.8 There are several dependencies you must install to use BIRDMAn. The easiest way to install the required dependencies is through ``conda`` or ``mamba``. .. code:: bash - conda install -c conda-forge biom-format patsy xarray arviz cmdstanpy + mamba install -c conda-forge biom-format patsy xarray arviz cmdstanpy pip install birdman If you are planning on contributing to BIRDMAn you must also install the following packages: @@ -53,6 +53,7 @@ If you are planning on contributing to BIRDMAn you must also install the followi :caption: API models + inference diagnostics summary transform diff --git a/docs/inference.rst b/docs/inference.rst new file mode 100644 index 0000000..45a082e --- /dev/null +++ b/docs/inference.rst @@ -0,0 +1,6 @@ +Inference Functions +=================== + +.. automodule:: birdman.inference + :members: + diff --git a/docs/models.rst b/docs/models.rst index 8dfd869..2c10ca0 100644 --- a/docs/models.rst +++ b/docs/models.rst @@ -14,6 +14,8 @@ These are the default models that are included in BIRDMAn. They should be usable :members: .. autoclass:: birdman.default_models.NegativeBinomialLME :members: +.. autoclass:: birdman.default_models.NegativeBinomialLMESingle + :members: Table Model ----------- From 1af6ba50590439d8c668f5ea2fefba2130b16e36 Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Sun, 29 Oct 2023 12:04:47 -0700 Subject: [PATCH 2/2] Updated docs --- birdman/inference.py | 55 ++++++++++++++++++++++++++++++++++++++++---- environment.yml | 3 +-- 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/birdman/inference.py b/birdman/inference.py index b33b7d2..a376891 100644 --- a/birdman/inference.py +++ b/birdman/inference.py @@ -16,7 +16,36 @@ def fit_to_inference( dims: dict, posterior_predictive: str = None, log_likelihood: str = None, -): +) -> az.InferenceData: + """Convert a fitted model to an arviz InferenceData object. + + :param fit: Fitted CmdStan model + :type fit: Either CmdStanMCMC or CmdStanVB + + :param chains: Number of chains + :type chains: int + + :param draws: Number of draws + :type draws: int + + :param params: Parameters to include in inference + :type params: Sequence[str] + + :param coords: Coordinates for InferenceData + :type coords: dict + + :param dims: Dimensions for InferenceData + :type dims: dict + + :param posterior_predictive: Name of posterior predictive var in model + :type posterior_predictive: str + + :param log_likelihood: Name of log likelihood var in model + :type log_likelihood: str + + :returns: Model converted to InferenceData + :rtype: az.InferenceData + """ if log_likelihood is not None and log_likelihood not in dims: raise KeyError("Must include dimensions for log-likelihood!") if posterior_predictive is not None and posterior_predictive not in dims: @@ -120,15 +149,33 @@ def concatenate_inferences( return az.concat(*all_group_inferences) -# TODO: Fix docstring def stan_var_to_da( data: np.ndarray, coords: dict, dims: dict, chains: int, draws: int -): - """Convert Stan variable draws to xr.DataArray.""" +) -> xr.DataArray: + """Convert results of stan_var to DataArray. + + :params data: Result of stan_var + :type data: np.ndarray + + :params coords: Coordinates of variables + :type coords: dict + + :params dims: Dimensions of variables + :type dims: dict + + :params chains: Number of chains + :type chains: int + + :params draws: Number of draws + :type draws: int + + :returns: DataArray representation of stan variables + :rtype: xr.DataArray + """ data = np.stack(np.split(data, chains)) coords["draw"] = np.arange(draws) diff --git a/environment.yml b/environment.yml index 72d8bd7..c4b0e5d 100644 --- a/environment.yml +++ b/environment.yml @@ -6,7 +6,7 @@ channels: dependencies: - pandas - numpy - - python=3.7 + - python=3.8 - python-language-server - xarray - patsy @@ -16,4 +16,3 @@ dependencies: - docutils==0.16 - pip: - sphinx-rtd-theme==0.5.1 -prefix: /Users/gibs/miniconda3/envs/birdman