Skip to content

Commit

Permalink
Merge pull request #134 from mortonjt/v3
Browse files Browse the repository at this point in the history
Updating to version 0.3
  • Loading branch information
mortonjt authored Mar 6, 2017
2 parents 8fab644 + 13d42ca commit 88c56ef
Show file tree
Hide file tree
Showing 21 changed files with 132 additions and 1,594 deletions.
2 changes: 1 addition & 1 deletion ci/conda_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ pip
nose
pep8
flake8
IPython<4.0.0
IPython>4.0.0
notebook
scikit-bio=0.5.1
pyqt=4.11.4
Expand Down
1 change: 1 addition & 0 deletions ci/qiime_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ git+https://github.com/qiime2/q2-types.git
git+https://github.com/qiime2/q2templates.git
git+https://github.com/qiime2/q2-composition.git
git+https://github.com/qiime2/q2cli.git
git+https://github.com/qiime2/q2-feature-table.git
3 changes: 3 additions & 0 deletions doc/source/cluster.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.. automodule:: gneiss.cluster


2 changes: 2 additions & 0 deletions doc/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ Contents:

balances
regression
cluster
plot
sort
util

Expand Down
3 changes: 3 additions & 0 deletions doc/source/plot.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.. automodule:: gneiss.plot


2 changes: 1 addition & 1 deletion gneiss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------

__version__ = "0.2.0"
__version__ = "0.3.0"
39 changes: 34 additions & 5 deletions gneiss/cluster/_pba.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


def proportional_linkage(X, method='ward'):
"""
r"""
Principal Balance Analysis using Hierarchical Clustering
based on proportionality.
Expand All @@ -24,7 +24,7 @@ def proportional_linkage(X, method='ward'):
two features :math:`x` and :math:`y` is measured by
.. math::
p(x, y) = var \ln \frac{x}{y}
p(x, y) = var (\ln \frac{x}{y})
If :math:`p(x, y)` is very small, then :math:`x` and :math:`y`
are said to be highly proportional. A hierarchical clustering is
Expand All @@ -43,18 +43,32 @@ def proportional_linkage(X, method='ward'):
skbio.TreeNode
Tree generated from principal balance analysis.
Refererences
------------
References
----------
.. [1] Pawlowsky-Glahn V, Egozcue JJ, and Tolosana-Delgado R.
Principal Balances (2011).
Examples
--------
>>> import pandas as pd
>>> from gneiss.cluster import proportional_linkage
>>> table = pd.DataFrame([[1, 1, 0, 0, 0],
... [0, 1, 1, 0, 0],
... [0, 0, 1, 1, 0],
... [0, 0, 0, 1, 1]],
... columns=['s1', 's2', 's3', 's4', 's5'],
... index=['o1', 'o2', 'o3', 'o4']).T
>>> tree = proportional_linkage(table+0.1)
"""
dm = variation_matrix(X)
lm = linkage(dm.condensed_form(), method=method)
return TreeNode.from_linkage_matrix(lm, X.columns)


def gradient_linkage(X, y, method='average'):
"""
r"""
Principal Balance Analysis using Hierarchical Clustering
on known gradient.
Expand Down Expand Up @@ -98,6 +112,21 @@ def gradient_linkage(X, y, method='average'):
See Also
--------
mean_niche_estimator
Examples
--------
>>> import pandas as pd
>>> from gneiss.cluster import gradient_linkage
>>> table = pd.DataFrame([[1, 1, 0, 0, 0],
... [0, 1, 1, 0, 0],
... [0, 0, 1, 1, 0],
... [0, 0, 0, 1, 1]],
... columns=['s1', 's2', 's3', 's4', 's5'],
... index=['o1', 'o2', 'o3', 'o4']).T
>>> gradient = pd.Series([1, 2, 3, 4, 5],
... index=['s1', 's2', 's3', 's4', 's5'])
>>> tree = gradient_linkage(table, gradient)
"""
_X, _y = match(X, y)
mean_X = mean_niche_estimator(_X, gradient=_y)
Expand Down
1 change: 1 addition & 0 deletions gneiss/plot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
:toctree: generated/
heatmap
radialplot
diamondtree
"""
# ----------------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions gneiss/plot/_radial.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def radialplot(tree, node_color='node_color', node_size='node_size',
bokeh.models.Plot
Interactive plotting instance.
Notes
-----
This assumes that the tree is strictly bifurcating.
Expand Down
47 changes: 23 additions & 24 deletions gneiss/regression/_mixedlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,33 +159,32 @@ def mixedlm(formula, table, metadata, tree, groups, **kwargs):


class LMEModel(RegressionModel):
def __init__(self, *args, **kwargs):
"""
Summary object for storing linear mixed effects results.
""" Summary object for storing linear mixed effects results.
A `LMEModel` object stores information about the
individual balances used in the regression, the coefficients,
residuals. This object can be used to perform predictions.
In addition, summary statistics such as the coefficient
of determination for the overall fit can be calculated.
A `LMEModel` object stores information about the
individual balances used in the regression, the coefficients,
residuals. This object can be used to perform predictions.
In addition, summary statistics such as the coefficient
of determination for the overall fit can be calculated.
Parameters
----------
submodels : list of statsmodels objects
List of statsmodels result objects.
basis : pd.DataFrame
Orthonormal basis in the Aitchison simplex.
Row names correspond to the leafs of the tree
and the column names correspond to the internal nodes
in the tree.
tree : skbio.TreeNode
Bifurcating tree that defines `basis`.
balances : pd.DataFrame
A table of balances where samples are rows and
balances are columns. These balances were calculated
using `tree`.
"""
Attributes
----------
submodels : list of statsmodels objects
List of statsmodels result objects.
basis : pd.DataFrame
Orthonormal basis in the Aitchison simplex.
Row names correspond to the leafs of the tree
and the column names correspond to the internal nodes
in the tree.
tree : skbio.TreeNode
Bifurcating tree that defines `basis`.
balances : pd.DataFrame
A table of balances where samples are rows and
balances are columns. These balances were calculated
using `tree`.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def fit(self, **kwargs):
Expand Down
49 changes: 24 additions & 25 deletions gneiss/regression/_ols.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,34 +179,33 @@ def ols(formula, table, metadata, tree, **kwargs):


class OLSModel(RegressionModel):
def __init__(self, *args, **kwargs):
"""
Summary object for storing ordinary least squares results.
""" Summary object for storing ordinary least squares results.
A `OLSModel` object stores information about the
individual balances used in the regression, the coefficients,
residuals. This object can be used to perform predictions.
In addition, summary statistics such as the coefficient
of determination for the overall fit can be calculated.
A `OLSModel` object stores information about the
individual balances used in the regression, the coefficients,
residuals. This object can be used to perform predictions.
In addition, summary statistics such as the coefficient
of determination for the overall fit can be calculated.
Parameters
----------
submodels : list of statsmodels objects
List of statsmodels result objects.
basis : pd.DataFrame
Orthonormal basis in the Aitchison simplex.
Row names correspond to the leaves of the tree
and the column names correspond to the internal nodes
in the tree. If this is not specified, then `project` cannot
be enabled in `coefficients` or `predict`.
tree : skbio.TreeNode
Bifurcating tree that defines `basis`.
balances : pd.DataFrame
A table of balances where samples are rows and
balances are columns. These balances were calculated
using `tree`.
"""
Attributes
----------
submodels : list of statsmodels objects
List of statsmodels result objects.
basis : pd.DataFrame
Orthonormal basis in the Aitchison simplex.
Row names correspond to the leaves of the tree
and the column names correspond to the internal nodes
in the tree. If this is not specified, then `project` cannot
be enabled in `coefficients` or `predict`.
tree : skbio.TreeNode
Bifurcating tree that defines `basis`.
balances : pd.DataFrame
A table of balances where samples are rows and
balances are columns. These balances were calculated
using `tree`.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def fit(self, regularized=False, **kwargs):
Expand Down
20 changes: 13 additions & 7 deletions gneiss/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
# ----------------------------------------------------------------------------

import unittest
import numpy as np
import pandas as pd
import pandas.util.testing as pdt
from skbio import TreeNode
from gneiss.util import (match, match_tips, rename_internal_nodes,
_type_cast_to_float)
_type_cast_to_float, random_tree)


class TestUtil(unittest.TestCase):
Expand Down Expand Up @@ -311,11 +312,6 @@ def test_rename_internal_nodes_names_mismatch(self):
with self.assertRaises(ValueError):
rename_internal_nodes(tree, ['r', 'abc'])

def test_rename_internal_nodes_warning(self):
tree = TreeNode.read([u"(((a,b)y2, c),d)r;"])
with self.assertWarns(Warning):
rename_internal_nodes(tree)

def test_rename_internal_nodes_immutable(self):
tree = TreeNode.read([u"(((a,b)y2, c),d)r;"])
rename_internal_nodes(tree)
Expand All @@ -336,9 +332,19 @@ def test_type_cast_to_float(self):
'b': [1., 2., 3., 4., 5.],
'c': ['a', 'b', 'c', 'd', 'e'],
'd': [1., 2., 3., 4., 5.]})

pdt.assert_frame_equal(res, exp)

def test_random_tree(self):
np.random.seed(0)
t = random_tree(10)
exp = ('((7:0.0359448798595,8:0.0359448798595)y1:0.312827608797,'
'((9:0.0272390892166,(4:0.00696620596189,6:0.00696620596189)'
'y5:0.0202728832547)y3:0.16313179006,((0:0.00196516046521,'
'3:0.00196516046521)y6:0.0815110118351,(1:0.0524584044569,'
'(2:0.0215653684975,5:0.0215653684975)y8:0.0308930359593)'
'y7:0.0310177678435)y4:0.106894706976)y2:0.15840160938)y0;\n')
self.assertEqual(str(t), exp)


if __name__ == '__main__':
unittest.main()
26 changes: 25 additions & 1 deletion gneiss/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
import pandas as pd
from .balances import balance_basis
from skbio.stats.composition import ilr
from scipy.cluster.hierarchy import ward
from skbio import TreeNode, DistanceMatrix


def match(table, metadata):
Expand Down Expand Up @@ -181,7 +183,7 @@ def rename_internal_nodes(tree, names=None, inplace=False):
label = names[i]
if n.name is not None and label == n.name:
warnings.warn("Warning. Internal node (%s) has been replaced "
"with (%s)" % (n.name, label))
"with (%s)" % (n.name, label), UserWarning)

n.name = label
i += 1
Expand Down Expand Up @@ -293,3 +295,25 @@ def _type_cast_to_float(df):
except:
continue
return df


def random_tree(n):
""" Generates a tree with random topology.
Parameters
----------
n : int
Number of nodes in the tree
Returns
-------
skbio.TreeNode
Random tree
"""
x = np.random.rand(n)
dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x-y))
lm = ward(dm.condensed_form())
ids = np.arange(len(x)).astype(np.str)
t = TreeNode.from_linkage_matrix(lm, ids)
t = rename_internal_nodes(t)
return t
Loading

0 comments on commit 88c56ef

Please sign in to comment.