Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proportion plots #234

Merged
merged 19 commits into from
Oct 13, 2017
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ install:
- conda install --yes -n test_env cython
- source activate test_env
- pip install -r ci/pip_requirements.txt
- pip install -e '.[q2]'
- pip install -e .
script:
- WITH_COVERAGE=TRUE make all
- if [ ${MAKE_DOC} ]; then make -C doc clean html; fi
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# gneiss changelog

## Version 0.4.1
* Added `proportion_plot` to plot the mean proportions within a single balance [#234](https://github.com/biocore/gneiss/pull/234)
* Added colorbar for heatmap
* Decoupled qiime2 from gneiss. All qiime2 commands have now been ported to [q2-gneiss](https://github.com/qiime2/q2-gneiss)

Expand Down
5 changes: 3 additions & 2 deletions gneiss/plot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@

from ._heatmap import heatmap
from ._radial import radialplot
from ._decompose import balance_boxplot, balance_barplots
from ._decompose import balance_boxplot, balance_barplots, proportion_plot


__all__ = ["heatmap", "radialplot", "balance_boxplot", "balance_barplots"]
__all__ = ["heatmap", "radialplot", "balance_boxplot",
"balance_barplots", "proportion_plot"]
134 changes: 134 additions & 0 deletions gneiss/plot/_decompose.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,137 @@ def balance_barplots(tree, balance_name, header, feature_metadata,
ax_num.set_xlim([0, max([num_.max().values[1],
denom_.max().values[1]])])
return ax_num, ax_denom


def proportion_plot(table, metadata, category, left_group, right_group,
num_features, denom_features,
feature_metadata=None,
label_col='species',
num_color='#105d33', denom_color='#b0d78e',
axes=(None, None)):
""" Plot the mean proportions of features within a balance.

This plots the numerator and denominator components within a balance.

Parameters
----------
table : pd.DataFrame
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are you concerned about requiring dense matrices for the table data?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope

Table of relative abundances.
metadata : pd.DataFrame
Samples metadata
spectrum : pd.Series
The component from partial least squares.
feature_metadata : pd.DataFrame
The metadata associated to the features.
category : str
Name of sample metadata category.
left_group : str
Name of group within sample metadata category to plot
on the left of the plot.
right_group : str
Name of group within sample metadata category to plot
on the right of the plot.
label_col : str
Column in the feature metadata table to summarize by.
num_color : str
Color to plot the numerator.
denom_color : str
Color to plot the denominator.

Returns
-------
ax_num : matplotlib.pyplot.Axes
Matplotlib axes for the numerator bars
ax_denom : matplotlib.pyplot.Axes
Matplotlib axes for the denominator bars
"""
import seaborn as sns
if axes[0] is None or axes[1] is None:
f, (ax_num, ax_denom) = plt.subplots(1, 2)
else:
ax_num, ax_denom = axes[0], axes[1]

fname = 'feature'
ptable = table.apply(lambda x: x / x.sum(), axis=1)
num_df = ptable[num_features]

denom_df = ptable[denom_features]

# merge together metadata and sequences
num_data_ = pd.merge(metadata, num_df,
left_index=True, right_index=True)
denom_data_ = pd.merge(metadata, denom_df,
left_index=True, right_index=True)

# merge the data frame, so that all of the proportions
# are in their own separate column
num_data = pd.melt(num_data_, id_vars=[category],
value_vars=list(num_df.columns),
value_name='proportion', var_name=fname)
num_data['part'] = 'numerator'
denom_data = pd.melt(denom_data_, id_vars=[category],
value_vars=list(denom_df.columns),
value_name='proportion', var_name=fname)
denom_data['part'] = 'denominator'
data = pd.concat((num_data, denom_data))
if feature_metadata is not None:
num_feature_metadata = feature_metadata.loc[num_df.columns,
label_col]
denom_feature_metadata = feature_metadata.loc[denom_df.columns,
label_col]
# order of the ids to plot
order = (list(num_feature_metadata.index) +
list(denom_feature_metadata.index))
else:
order = (list(num_df.columns) +
list(denom_df.columns))

less_df = data.loc[data[category] == left_group].dropna()

sns.barplot(x='proportion',
y=fname,
data=less_df,
color=denom_color,
order=order,
ax=ax_denom)
more_df = data.loc[data[category] == right_group].dropna()

sns.barplot(x='proportion',
y=fname,
data=more_df,
color=num_color,
order=order,
ax=ax_num)
if feature_metadata is not None:
ax_denom.set(yticklabels=(list(num_feature_metadata.values) +
list(denom_feature_metadata.values)),
title=left_group)
else:
ax_denom.set(yticklabels=order, title=left_group)

ax_num.set(yticklabels=[], ylabel='', yticks=[], title=right_group)

max_xlim = max(ax_denom.get_xlim()[1], ax_num.get_xlim()[1])
min_xlim = max(ax_denom.get_xlim()[0], ax_num.get_xlim()[0])

max_ylim, min_ylim = ax_denom.get_ylim()

ax_denom.set_xlim(max_xlim, min_xlim)
ax_num.set_xlim(min_xlim, max_xlim)
ax_denom.set_position([0.2, 0.125, 0.3, 0.75])
ax_num.set_position([0.5, 0.125, 0.3, 0.75])

num_h = num_df.shape[1]
denom_h = denom_df.shape[1]

space = (max_ylim - min_ylim) / (num_h + denom_h)
ymid = (max_ylim - min_ylim) * num_h / (num_h + denom_h) - 0.5 * space

ax_denom.axhspan(min_ylim, ymid, facecolor=num_color,
zorder=0, alpha=0.25)
ax_denom.axhspan(ymid, max_ylim, facecolor=denom_color,
zorder=0, alpha=0.25)

ax_num.axhspan(min_ylim, ymid, facecolor=num_color, zorder=0, alpha=0.25)
ax_num.axhspan(ymid, max_ylim, facecolor=denom_color, zorder=0, alpha=0.25)
return (ax_num, ax_denom)
117 changes: 116 additions & 1 deletion gneiss/plot/tests/test_decompose.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------
import unittest
from gneiss.plot import balance_boxplot, balance_barplots
from gneiss.plot import balance_boxplot, balance_barplots, proportion_plot
import numpy as np
import pandas as pd
import numpy.testing as npt
import matplotlib.pyplot as plt
from skbio import TreeNode


Expand Down Expand Up @@ -105,5 +106,119 @@ def test_basic_barplot(self):
feature_metadata=self.feature_df)


class TestProportionPlot(unittest.TestCase):
def setUp(self):
self.table = pd.DataFrame({
'A': [1, 1.2, 1.1, 2.1, 2.2, 2],
'B': [9.9, 10, 10.1, 2, 2.4, 2.1],
'C': [5, 3, 1, 2, 2, 3],
'D': [5, 5, 5, 5, 5, 5],
}, index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6'])

self.feature_metadata = pd.DataFrame({
'A': ['k__foo', 'p__bar', 'c__', 'o__', 'f__', 'g__', 's__'],
'B': ['k__foo', 'p__bar', 'c__', 'o__', 'f__', 'g__', 's__'],
'C': ['k__poo', 'p__tar', 'c__', 'o__', 'f__', 'g__', 's__'],
'D': ['k__poo', 'p__far', 'c__', 'o__', 'f__', 'g__', 's__']
}, index=['kingdom', 'phylum', 'class', 'order',
'family', 'genus', 'species']).T

self.metadata = pd.DataFrame({
'groups': ['X', 'X', 'X', 'Y', 'Y', 'Y'],
'dry': [1, 2, 3, 4, 5, 6]
}, index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6'])

def test_proportion_plot(self):
np.random.seed(0)
num_features = ['A', 'B']
denom_features = ['C', 'D']
ax1, ax2 = proportion_plot(self.table, self.metadata,
'groups', 'X', 'Y',
num_features, denom_features,
self.feature_metadata,
label_col='phylum')
res = np.vstack([l.get_xydata() for l in ax1.get_lines()])
exp=np.array([0., 0., 1., 1., 2., 2., 3., 3.])

npt.assert_allclose(res[:, 1], exp, verbose=True)

res = np.vstack([l.get_xydata() for l in ax2.get_lines()])
exp=np.array([0., 0., 1., 1., 2., 2., 3., 3.])

npt.assert_allclose(res[:, 1], exp, verbose=True)

res = [l._text for l in ax2.get_yticklabels()]
exp = ['p__bar', 'p__bar', 'p__tar', 'p__far']
self.assertListEqual(res, exp)

def test_proportion_plot_order(self):
self.maxDiff = None
np.random.seed(0)
# tests for different ordering
num_features = ['A', 'B']
denom_features = ['D', 'C']
ax1, ax2 = proportion_plot(self.table, self.metadata,
'groups', 'X', 'Y',
num_features, denom_features,
self.feature_metadata,
label_col='phylum')
res = np.vstack([l.get_xydata() for l in ax1.get_lines()])
exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])

npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True)

res = np.vstack([l.get_xydata() for l in ax2.get_lines()])
exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])

npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True)

res = [l._text for l in ax2.get_yticklabels()]
exp = ['p__bar', 'p__bar', 'p__far', 'p__tar']
self.assertListEqual(res, exp)

def test_proportion_plot_order_figure(self):
self.maxDiff = None
np.random.seed(0)
# tests for different ordering
fig, axes = plt.subplots(1, 2)

num_features = ['A', 'B']
denom_features = ['D', 'C']
ax1, ax2 = proportion_plot(self.table, self.metadata,
'groups', 'X', 'Y',
num_features, denom_features,
self.feature_metadata,
label_col='phylum', axes=axes)
res = np.vstack([l.get_xydata() for l in ax1.get_lines()])
exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])

npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True)

res = np.vstack([l.get_xydata() for l in ax2.get_lines()])
exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])


npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True)

res = [l._text for l in ax2.get_yticklabels()]
exp = ['p__bar', 'p__bar', 'p__far', 'p__tar']
self.assertListEqual(res, exp)

def test_proportion_plot_original_labels(self):
# tests for different ordering
fig, axes = plt.subplots(1, 2)

num_features = ['A', 'B']
denom_features = ['D', 'C']
ax1, ax2 = proportion_plot(self.table, self.metadata,
'groups', 'X', 'Y',
num_features, denom_features,
axes=axes)

res = [l._text for l in ax2.get_yticklabels()]
exp = ['A', 'B', 'D', 'C']
self.assertListEqual(res, exp)


if __name__ == '__main__':
unittest.main()
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ def finalize_options(self):
'nose >= 1.3.7',
'scikit-bio==0.5.1',
'statsmodels>=0.8.0',
'biom-format',
'seaborn'
],
classifiers=classifiers,
Expand Down