diff --git a/.travis.yml b/.travis.yml index 5f0593a..2d82f68 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,7 +21,7 @@ install: - conda install --yes -n test_env cython - source activate test_env - pip install -r ci/pip_requirements.txt - - pip install -e '.[q2]' + - pip install -e . script: - WITH_COVERAGE=TRUE make all - if [ ${MAKE_DOC} ]; then make -C doc clean html; fi diff --git a/CHANGELOG.md b/CHANGELOG.md index 04b15e6..d0b2e5d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # gneiss changelog ## Version 0.4.1 +* Added `proportion_plot` to plot the mean proportions within a single balance [#234](https://github.com/biocore/gneiss/pull/234) * Added colorbar for heatmap * Decoupled qiime2 from gneiss. All qiime2 commands have now been ported to [q2-gneiss](https://github.com/qiime2/q2-gneiss) diff --git a/gneiss/plot/__init__.py b/gneiss/plot/__init__.py index 145b09d..340b758 100644 --- a/gneiss/plot/__init__.py +++ b/gneiss/plot/__init__.py @@ -27,7 +27,8 @@ from ._heatmap import heatmap from ._radial import radialplot -from ._decompose import balance_boxplot, balance_barplots +from ._decompose import balance_boxplot, balance_barplots, proportion_plot -__all__ = ["heatmap", "radialplot", "balance_boxplot", "balance_barplots"] +__all__ = ["heatmap", "radialplot", "balance_boxplot", + "balance_barplots", "proportion_plot"] diff --git a/gneiss/plot/_decompose.py b/gneiss/plot/_decompose.py index a6443b7..96fe1c4 100644 --- a/gneiss/plot/_decompose.py +++ b/gneiss/plot/_decompose.py @@ -155,3 +155,173 @@ def balance_barplots(tree, balance_name, header, feature_metadata, ax_num.set_xlim([0, max([num_.max().values[1], denom_.max().values[1]])]) return ax_num, ax_denom + + +def proportion_plot(table, metadata, category, left_group, right_group, + num_features, denom_features, + feature_metadata=None, + label_col='species', + num_color='#105d33', denom_color='#b0d78e', + axes=(None, None)): + """ Plot the mean proportions of features within a balance. + + This plots the numerator and denominator components within a balance. + + Parameters + ---------- + table : pd.DataFrame + Table of relative abundances. + metadata : pd.DataFrame + Samples metadata + spectrum : pd.Series + The component from partial least squares. + feature_metadata : pd.DataFrame + The metadata associated to the features. + category : str + Name of sample metadata category. + left_group : str + Name of group within sample metadata category to plot + on the left of the plot. + right_group : str + Name of group within sample metadata category to plot + on the right of the plot. + label_col : str + Column in the feature metadata table to summarize by. + num_color : str + Color to plot the numerator. + denom_color : str + Color to plot the denominator. + + Returns + ------- + ax_num : matplotlib.pyplot.Axes + Matplotlib axes for the numerator bars + ax_denom : matplotlib.pyplot.Axes + Matplotlib axes for the denominator bars + + Examples + -------- + First we'll want to set up the main objects to pass into the plot. + For starters, we'll pass in the feature table, metadata and + feature_metadata. + + >>> table = pd.DataFrame({ + ... 'A': [1, 1.2, 1.1, 2.1, 2.2, 2], + ... 'B': [9.9, 10, 10.1, 2, 2.4, 2.1], + ... 'C': [5, 3, 1, 2, 2, 3], + ... 'D': [5, 5, 5, 5, 5, 5], + ... }, index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6']) + + >>> feature_metadata = pd.DataFrame({ + ... 'A': ['k__foo', 'p__bar', 'c__', 'o__', 'f__', 'g__', 's__'], + ... 'B': ['k__foo', 'p__bar', 'c__', 'o__', 'f__', 'g__', 's__'], + ... 'C': ['k__poo', 'p__tar', 'c__', 'o__', 'f__', 'g__', 's__'], + ... 'D': ['k__poo', 'p__far', 'c__', 'o__', 'f__', 'g__', 's__'] + ... }, index=['kingdom', 'phylum', 'class', 'order', 'family', + ... 'genus', 'species']).T + + >>> metadata = pd.DataFrame({ + ... 'groups': ['X', 'X', 'X', 'Y', 'Y', 'Y'], + ... 'dry': [1, 2, 3, 4, 5, 6]}, + ... index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6']) + + Then we can specify which specific features to visualize and plot. + >>> num_features = ['A', 'B'] + >>> denom_features = ['C', 'D'] + >>> ax1, ax2 = proportion_plot(table, metadata, 'groups', 'X', 'Y', + ... num_features, denom_features, + ... feature_metadata, label_col='phylum') + + Since this method will return the raw matplotlib object, labels, titles, + ticks, etc can directly modified using this object. + """ + import seaborn as sns + if axes[0] is None or axes[1] is None: + f, (ax_num, ax_denom) = plt.subplots(1, 2) + else: + ax_num, ax_denom = axes[0], axes[1] + + fname = 'feature' + ptable = table.apply(lambda x: x / x.sum(), axis=1) + num_df = ptable[num_features] + + denom_df = ptable[denom_features] + + # merge together metadata and sequences + num_data_ = pd.merge(metadata, num_df, + left_index=True, right_index=True) + denom_data_ = pd.merge(metadata, denom_df, + left_index=True, right_index=True) + + # merge the data frame, so that all of the proportions + # are in their own separate column + num_data = pd.melt(num_data_, id_vars=[category], + value_vars=list(num_df.columns), + value_name='proportion', var_name=fname) + num_data['part'] = 'numerator' + denom_data = pd.melt(denom_data_, id_vars=[category], + value_vars=list(denom_df.columns), + value_name='proportion', var_name=fname) + denom_data['part'] = 'denominator' + data = pd.concat((num_data, denom_data)) + if feature_metadata is not None: + num_feature_metadata = feature_metadata.loc[num_df.columns, + label_col] + denom_feature_metadata = feature_metadata.loc[denom_df.columns, + label_col] + # order of the ids to plot + order = (list(num_feature_metadata.index) + + list(denom_feature_metadata.index)) + else: + order = (list(num_df.columns) + + list(denom_df.columns)) + + less_df = data.loc[data[category] == left_group].dropna() + + sns.barplot(x='proportion', + y=fname, + data=less_df, + color=denom_color, + order=order, + ax=ax_denom) + more_df = data.loc[data[category] == right_group].dropna() + + sns.barplot(x='proportion', + y=fname, + data=more_df, + color=num_color, + order=order, + ax=ax_num) + if feature_metadata is not None: + ax_denom.set(yticklabels=(list(num_feature_metadata.values) + + list(denom_feature_metadata.values)), + title=left_group) + else: + ax_denom.set(yticklabels=order, title=left_group) + + ax_num.set(yticklabels=[], ylabel='', yticks=[], title=right_group) + + max_xlim = max(ax_denom.get_xlim()[1], ax_num.get_xlim()[1]) + min_xlim = max(ax_denom.get_xlim()[0], ax_num.get_xlim()[0]) + + max_ylim, min_ylim = ax_denom.get_ylim() + + ax_denom.set_xlim(max_xlim, min_xlim) + ax_num.set_xlim(min_xlim, max_xlim) + ax_denom.set_position([0.2, 0.125, 0.3, 0.75]) + ax_num.set_position([0.5, 0.125, 0.3, 0.75]) + + num_h = num_df.shape[1] + denom_h = denom_df.shape[1] + + space = (max_ylim - min_ylim) / (num_h + denom_h) + ymid = (max_ylim - min_ylim) * num_h / (num_h + denom_h) - 0.5 * space + + ax_denom.axhspan(min_ylim, ymid, facecolor=num_color, + zorder=0, alpha=0.25) + ax_denom.axhspan(ymid, max_ylim, facecolor=denom_color, + zorder=0, alpha=0.25) + + ax_num.axhspan(min_ylim, ymid, facecolor=num_color, zorder=0, alpha=0.25) + ax_num.axhspan(ymid, max_ylim, facecolor=denom_color, zorder=0, alpha=0.25) + return (ax_num, ax_denom) diff --git a/gneiss/plot/tests/test_decompose.py b/gneiss/plot/tests/test_decompose.py index bef7335..19baea6 100644 --- a/gneiss/plot/tests/test_decompose.py +++ b/gneiss/plot/tests/test_decompose.py @@ -6,10 +6,11 @@ # The full license is in the file COPYING.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest -from gneiss.plot import balance_boxplot, balance_barplots +from gneiss.plot import balance_boxplot, balance_barplots, proportion_plot import numpy as np import pandas as pd import numpy.testing as npt +import matplotlib.pyplot as plt from skbio import TreeNode @@ -105,5 +106,118 @@ def test_basic_barplot(self): feature_metadata=self.feature_df) +class TestProportionPlot(unittest.TestCase): + def setUp(self): + self.table = pd.DataFrame({ + 'A': [1, 1.2, 1.1, 2.1, 2.2, 2], + 'B': [9.9, 10, 10.1, 2, 2.4, 2.1], + 'C': [5, 3, 1, 2, 2, 3], + 'D': [5, 5, 5, 5, 5, 5], + }, index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6']) + + self.feature_metadata = pd.DataFrame({ + 'A': ['k__foo', 'p__bar', 'c__', 'o__', 'f__', 'g__', 's__'], + 'B': ['k__foo', 'p__bar', 'c__', 'o__', 'f__', 'g__', 's__'], + 'C': ['k__poo', 'p__tar', 'c__', 'o__', 'f__', 'g__', 's__'], + 'D': ['k__poo', 'p__far', 'c__', 'o__', 'f__', 'g__', 's__'] + }, index=['kingdom', 'phylum', 'class', 'order', + 'family', 'genus', 'species']).T + + self.metadata = pd.DataFrame({ + 'groups': ['X', 'X', 'X', 'Y', 'Y', 'Y'], + 'dry': [1, 2, 3, 4, 5, 6] + }, index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6']) + + def test_proportion_plot(self): + np.random.seed(0) + num_features = ['A', 'B'] + denom_features = ['C', 'D'] + ax1, ax2 = proportion_plot(self.table, self.metadata, + 'groups', 'X', 'Y', + num_features, denom_features, + self.feature_metadata, + label_col='phylum') + res = np.vstack([l.get_xydata() for l in ax1.get_lines()]) + exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.]) + + npt.assert_allclose(res[:, 1], exp, verbose=True) + + res = np.vstack([l.get_xydata() for l in ax2.get_lines()]) + exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.]) + + npt.assert_allclose(res[:, 1], exp, verbose=True) + + res = [l._text for l in ax2.get_yticklabels()] + exp = ['p__bar', 'p__bar', 'p__tar', 'p__far'] + self.assertListEqual(res, exp) + + def test_proportion_plot_order(self): + self.maxDiff = None + np.random.seed(0) + # tests for different ordering + num_features = ['A', 'B'] + denom_features = ['D', 'C'] + ax1, ax2 = proportion_plot(self.table, self.metadata, + 'groups', 'X', 'Y', + num_features, denom_features, + self.feature_metadata, + label_col='phylum') + res = np.vstack([l.get_xydata() for l in ax1.get_lines()]) + exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.]) + + npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True) + + res = np.vstack([l.get_xydata() for l in ax2.get_lines()]) + exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.]) + + npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True) + + res = [l._text for l in ax2.get_yticklabels()] + exp = ['p__bar', 'p__bar', 'p__far', 'p__tar'] + self.assertListEqual(res, exp) + + def test_proportion_plot_order_figure(self): + self.maxDiff = None + np.random.seed(0) + # tests for different ordering + fig, axes = plt.subplots(1, 2) + + num_features = ['A', 'B'] + denom_features = ['D', 'C'] + ax1, ax2 = proportion_plot(self.table, self.metadata, + 'groups', 'X', 'Y', + num_features, denom_features, + self.feature_metadata, + label_col='phylum', axes=axes) + res = np.vstack([l.get_xydata() for l in ax1.get_lines()]) + exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.]) + + npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True) + + res = np.vstack([l.get_xydata() for l in ax2.get_lines()]) + exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.]) + + npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True) + + res = [l._text for l in ax2.get_yticklabels()] + exp = ['p__bar', 'p__bar', 'p__far', 'p__tar'] + self.assertListEqual(res, exp) + + def test_proportion_plot_original_labels(self): + # tests for different ordering + fig, axes = plt.subplots(1, 2) + + num_features = ['A', 'B'] + denom_features = ['D', 'C'] + ax1, ax2 = proportion_plot(self.table, self.metadata, + 'groups', 'X', 'Y', + num_features, denom_features, + axes=axes) + + res = [l._text for l in ax2.get_yticklabels()] + exp = ['A', 'B', 'D', 'C'] + self.assertListEqual(res, exp) + + if __name__ == '__main__': unittest.main() diff --git a/gneiss/plot/tests/test_radial.py b/gneiss/plot/tests/test_radial.py index b46dc20..1825a95 100644 --- a/gneiss/plot/tests/test_radial.py +++ b/gneiss/plot/tests/test_radial.py @@ -6,7 +6,6 @@ from skbio import TreeNode, DistanceMatrix from gneiss.plot._radial import radialplot from gneiss.plot._dendrogram import UnrootedDendrogram -import pandas.util.testing as pdt class TestRadial(unittest.TestCase): @@ -84,9 +83,9 @@ def test_basic_plot(self): node_size='node_size', edge_width='edge_width') for e in exp_edges.keys(): - pdt.assert_series_equal( - p.renderers[0].data_source.data[e], - pd.Series(exp_edges[e], name=e)) + self.assertListEqual( + list(p.renderers[0].data_source.data[e]), + exp_edges[e]) for e in exp_nodes.keys(): self.assertListEqual( diff --git a/setup.py b/setup.py index 15d19b1..e31cc3c 100644 --- a/setup.py +++ b/setup.py @@ -86,7 +86,6 @@ def finalize_options(self): 'nose >= 1.3.7', 'scikit-bio==0.5.1', 'statsmodels>=0.8.0', - 'biom-format', 'seaborn' ], classifiers=classifiers,