Skip to content

Commit

Permalink
Merge pull request #53 from gibsramen/update-inf-transformation
Browse files Browse the repository at this point in the history
Update inference conversion to be transform agnostic
  • Loading branch information
gibsramen authored Jul 12, 2021
2 parents bb2fb6a + 2c1c1ed commit 1542bcb
Show file tree
Hide file tree
Showing 16 changed files with 227 additions and 142 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

**B**ayesian **I**nferential **R**egression for **D**ifferential **M**icrobiome **An**alysis (**BIRDMAn**) is a framework for performing differential abundance analysis through Bayesian inference.

See the [documentation](https://birdman.readthedocs.io/en/stable/?badge=stable) for details on usage.

## Installation

Currently BIRDMAn requires Python 3.7 or higher.
Expand All @@ -19,5 +21,3 @@ pip install birdman
```

You have to install `cmdstan` through the `cmdstanpy.install_cmdstan` function first. See the [CmdStanPy documentation](https://mc-stan.org/cmdstanpy/installation.html#install-cmdstan) for details.

See the [documentation](https://birdman.readthedocs.io/en/stable/?badge=stable) for details on usage.
15 changes: 7 additions & 8 deletions birdman/default_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,23 +103,22 @@ def __init__(
self.specify_model(
params=["beta", "phi"],
dims={
"beta": ["covariate", "feature"],
"beta": ["covariate", "feature_alr"],
"phi": ["feature"],
"log_lhood": ["tbl_sample", "feature"],
"y_predict": ["tbl_sample", "feature"]
},
coords={
"covariate": self.colnames,
"feature": self.feature_names,
"feature_alr": self.feature_names[1:],
"tbl_sample": self.sample_names
},
include_observed_data=True,
posterior_predictive="y_predict",
log_likelihood="log_lhood"
)

self.specifications["alr_params"] = ["beta"]


class NegativeBinomialSingle(SingleFeatureModel):
"""Fit count data using negative binomial model on single feature.
Expand Down Expand Up @@ -330,15 +329,16 @@ def __init__(
self.specify_model(
params=["beta", "phi", "subj_int"],
dims={
"beta": ["covariate", "feature"],
"beta": ["covariate", "feature_alr"],
"phi": ["feature"],
"subj_int": ["group", "feature"],
"subj_int": ["group", "feature_alr"],
"log_lhood": ["tbl_sample", "feature"],
"y_predict": ["tbl_sample", "feature"]
},
coords={
"covariate": self.colnames,
"feature": self.feature_names,
"feature_alr": self.feature_names[1:],
"tbl_sample": self.sample_names,
"group": self.groups
},
Expand All @@ -347,8 +347,6 @@ def __init__(
log_likelihood="log_lhood"
)

self.specifications["alr_params"] = ["beta", "subj_int"]


class Multinomial(TableModel):
"""Fit count data using serial multinomial model.
Expand Down Expand Up @@ -422,13 +420,14 @@ def __init__(
self.specify_model(
params=["beta"],
dims={
"beta": ["covariate", "feature"],
"beta": ["covariate", "feature_alr"],
"log_lhood": ["tbl_sample"],
"y_predict": ["tbl_sample", "feature"]
},
coords={
"covariate": self.colnames,
"feature": self.feature_names,
"feature_alr": self.feature_names[1:],
"tbl_sample": self.sample_names,
},
include_observed_data=True,
Expand Down
43 changes: 1 addition & 42 deletions birdman/model_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,8 @@

import arviz as az
from cmdstanpy import CmdStanMCMC
import numpy as np
import xarray as xr

from .util import convert_beta_coordinates


def full_fit_to_inference(
fit: CmdStanMCMC,
Expand Down Expand Up @@ -46,9 +43,6 @@ def full_fit_to_inference(
:returns: ``arviz`` InferenceData object with selected values
:rtype: az.InferenceData
"""
# remove alr params so initial dim fitting works
new_dims = {k: v for k, v in dims.items() if k not in alr_params}

if log_likelihood is not None and log_likelihood not in dims:
raise KeyError("Must include dimensions for log-likelihood!")
if posterior_predictive is not None and posterior_predictive not in dims:
Expand All @@ -59,47 +53,12 @@ def full_fit_to_inference(
coords=coords,
log_likelihood=log_likelihood,
posterior_predictive=posterior_predictive,
dims=new_dims
dims=dims
)

vars_to_drop = set(inference.posterior.data_vars).difference(params)
inference.posterior = _drop_data(inference.posterior, vars_to_drop)

# Convert each param in ALR coordinates to CLR coordinates
for param in alr_params:
# Want to run on each chain independently
all_chain_clr_coords = []
all_chain_alr_coords = np.split(fit.stan_variable(param), fit.chains,
axis=0)
for i, chain_alr_coords in enumerate(all_chain_alr_coords):
# arviz 0.11.2 seems to flatten for some reason even though
# the PR was specifically supposed to do the opposite.
# Not sure what's going on but just going to go through cmdstanpy.
chain_clr_coords = convert_beta_coordinates(chain_alr_coords)
all_chain_clr_coords.append(chain_clr_coords)
all_chain_clr_coords = np.array(all_chain_clr_coords)

tmp_dims = ["chain", "draw"] + dims[param]
mcmc_coords = {
"chain": np.arange(fit.chains),
"draw": np.arange(fit.num_draws_sampling)
}
# restrict param DataArray to only required dims/coords
tmp_coords = {k: coords[k] for k in dims[param]}
param_da = xr.DataArray(
all_chain_clr_coords,
dims=tmp_dims,
coords={**tmp_coords, **mcmc_coords}
)
inference.posterior[param] = param_da

# TODO: Clean this up
all_dims = list(inference.posterior.dims)
dims_to_drop = []
for dim in all_dims:
if re.match(f"{param}_dim_\\d", dim):
dims_to_drop.append(dim)
inference.posterior = inference.posterior.drop_dims(dims_to_drop)
return inference


Expand Down
4 changes: 2 additions & 2 deletions birdman/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
from scipy.stats import f as f_distrib

from .util import clr_to_alr
from .transform import _clr_to_alr


def hotelling_ttest(
Expand Down Expand Up @@ -45,7 +45,7 @@ def _hotelling(x: np.ndarray) -> Tuple[np.float64, np.float64]:
:returns: :math:`t^2` & p-value
:rtype: Tuple(float, float)
"""
x_alr = clr_to_alr(x) # Can't use CLR b/c covariance matrix is singular
x_alr = _clr_to_alr(x) # Can't use CLR b/c covariance matrix is singular
num_feats, num_draws = x_alr.shape

if num_feats > num_draws:
Expand Down
111 changes: 111 additions & 0 deletions birdman/transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from typing import Sequence

import numpy as np
import xarray as xr


def posterior_alr_to_clr(
posterior: xr.Dataset,
alr_params: list,
dim_replacement: dict,
new_labels: Sequence
) -> xr.Dataset:
"""Convert posterior draws from ALR to CLR.
:param posterior: Posterior draws for fitted parameters
:type posterior: xr.Dataset
:param alr_params: List of parameters to convert from ALR to CLR
:type alr_params: list
:param dim_replacement: Dictionary of updated posterior dimension names
e.g. {"feature_alr": "feature"}
:type dim_replacement: dict
:param new_labels: Coordinates to assign to CLR posterior draws
:type new_labels: Sequence
"""
new_posterior = posterior.copy()
for param in alr_params:
param_da = posterior[param]
all_chain_alr_coords = param_da
all_chain_clr_coords = []

for i, chain_alr_coords in all_chain_alr_coords.groupby("chain"):
chain_clr_coords = _beta_alr_to_clr(chain_alr_coords)
all_chain_clr_coords.append(chain_clr_coords)

all_chain_clr_coords = np.array(all_chain_clr_coords)

new_dims = [
dim_replacement[x]
if x in dim_replacement else x
for x in param_da.dims
]
# Replace coords with updated labels

new_coords = dict()
for dim in param_da.dims:
if dim in dim_replacement:
new_name = dim_replacement[dim]
new_coords[new_name] = new_labels
else:
new_coords[dim] = param_da.coords.get(dim).data

new_param_da = xr.DataArray(
all_chain_clr_coords,
dims=new_dims,
coords=new_coords
)
new_posterior[param] = new_param_da

new_posterior = new_posterior.drop_vars(dim_replacement.keys())
return new_posterior


def _alr_to_clr(x: np.ndarray) -> np.ndarray:
"""Convert ALR coordinates to centered CLR coordinates.
:param x: Matrix of ALR coordinates (features x draws)
:type x: np.ndarray
:returns: Matrix of centered CLR coordinates
:rtype: np.ndarray
"""
num_draws = x.shape[1]
z = np.zeros((1, num_draws))
x_clr = np.vstack((z, x))
x_clr = x_clr - x_clr.mean(axis=0).reshape(1, -1)
return x_clr


def _clr_to_alr(x: np.ndarray) -> np.ndarray:
"""Convert CLR coordinates to ALR coordinates.
:param x: Matrix of centered CLR coordinates (features x draws)
:type x: np.ndarray
:returns: Matrix of ALR coordinates
:rtype: np.ndarray
"""
ref = x[0, :] # first feature as reference
return (x - ref)[1:, :]


def _beta_alr_to_clr(beta: np.ndarray) -> np.ndarray:
"""Convert feature-covariate coefficients from ALR to CLR.
:param beta: Matrix of beta ALR coordinates (n draws x p covariates x
d features)
:type beta: np.ndarray
:returns: Matrix of beta CLR coordinates (n draws x p covariates x d+1
features)
:rtype: np.ndarray
"""
num_draws, num_covariates, num_features = beta.shape
beta_clr = np.zeros((num_draws, num_covariates, num_features+1))
for i in range(num_covariates): # TODO: vectorize
beta_slice = beta[:, i, :].T # features x draws
beta_clr[:, i, :] = _alr_to_clr(beta_slice).T
return beta_clr
49 changes: 0 additions & 49 deletions birdman/util.py

This file was deleted.

Loading

0 comments on commit 1542bcb

Please sign in to comment.