Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Util #12

Merged
merged 13 commits into from
Jul 26, 2016
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Version 0.0.2 (changes since 0.0.2 go here)

### Features
* Adding in utility functions for handing feature tables, metadata, and trees. [#12](https://github.com/biocore/gneiss/pull/12)
* Adding GPL license.

### Bug fixes
322 changes: 322 additions & 0 deletions gneiss/tests/test_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,322 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2016--, gneiss development team.
#
# Distributed under the terms of the GPLv3 License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------

import unittest
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realized that none of this files have the copyright notice on top. Should it be added?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

import pandas as pd
import pandas.util.testing as pdt
from skbio import TreeNode
from gneiss.util import match, match_tips, rename_internal_nodes


class TestUtil(unittest.TestCase):

def test_match(self):
table = pd.DataFrame([[0, 0, 1, 1],
[2, 2, 4, 4],
[5, 5, 3, 3],
[0, 0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['o1', 'o2', 'o3', 'o4'])
metadata = pd.DataFrame([['a', 'control'],
['b', 'control'],
['c', 'diseased'],
['d', 'diseased']],
index=['s1', 's2', 's3', 's4'],
columns=['Barcode', 'Treatment'])
exp_table, exp_metadata = table, metadata
res_table, res_metadata = match(table, metadata)
pdt.assert_frame_equal(exp_table, res_table)
pdt.assert_frame_equal(exp_metadata, res_metadata)

def test_match_immutable(self):
# tests to make sure that the original tables don't change.
table = pd.DataFrame([[0, 0, 1, 1],
[2, 2, 4, 4],
[5, 5, 3, 3],
[0, 0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['o1', 'o2', 'o3', 'o4'])
metadata = pd.DataFrame([['a', 'control'],
['c', 'diseased'],
['b', 'control']],
index=['s1', 's3', 's2'],
columns=['Barcode', 'Treatment'])

exp_table = pd.DataFrame([[0, 0, 1, 1],
[2, 2, 4, 4],
[5, 5, 3, 3],
[0, 0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['o1', 'o2', 'o3', 'o4'])
exp_metadata = pd.DataFrame([['a', 'control'],
['c', 'diseased'],
['b', 'control']],
index=['s1', 's3', 's2'],
columns=['Barcode', 'Treatment'])
match(table, metadata, intersect=True)
pdt.assert_frame_equal(table, exp_table)
pdt.assert_frame_equal(metadata, exp_metadata)

def test_match_duplicate(self):
table1 = pd.DataFrame([[0, 0, 1, 1],
[2, 2, 4, 4],
[5, 5, 3, 3],
[0, 0, 0, 1]],
index=['s2', 's2', 's3', 's4'],
columns=['o1', 'o2', 'o3', 'o4'])
metadata1 = pd.DataFrame([['a', 'control'],
['b', 'control'],
['c', 'diseased'],
['d', 'diseased']],
index=['s1', 's2', 's3', 's4'],
columns=['Barcode', 'Treatment'])

table2 = pd.DataFrame([[0, 0, 1, 1],
[2, 2, 4, 4],
[5, 5, 3, 3],
[0, 0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['o1', 'o2', 'o3', 'o4'])
metadata2 = pd.DataFrame([['a', 'control'],
['b', 'control'],
['c', 'diseased'],
['d', 'diseased']],
index=['s1', 's1', 's3', 's4'],
columns=['Barcode', 'Treatment'])

with self.assertRaises(ValueError):
match(table1, metadata1)
with self.assertRaises(ValueError):
match(table2, metadata2)

def test_match_scrambled(self):
table = pd.DataFrame([[0, 0, 1, 1],
[2, 2, 4, 4],
[5, 5, 3, 3],
[0, 0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['o1', 'o2', 'o3', 'o4'])
metadata = pd.DataFrame([['a', 'control'],
['c', 'diseased'],
['b', 'control'],
['d', 'diseased']],
index=['s1', 's3', 's2', 's4'],
columns=['Barcode', 'Treatment'])
exp_table = table
exp_metadata = pd.DataFrame([['a', 'control'],
['b', 'control'],
['c', 'diseased'],
['d', 'diseased']],
index=['s1', 's2', 's3', 's4'],
columns=['Barcode', 'Treatment'])

res_table, res_metadata = match(table, metadata)
pdt.assert_frame_equal(exp_table, res_table)
pdt.assert_frame_equal(exp_metadata, res_metadata)

def test_match_intersect(self):
table = pd.DataFrame([[0, 0, 1, 1],
[2, 2, 4, 4],
[5, 5, 3, 3],
[0, 0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['o1', 'o2', 'o3', 'o4'])
metadata = pd.DataFrame([['a', 'control'],
['c', 'diseased'],
['b', 'control']],
index=['s1', 's3', 's2'],
columns=['Barcode', 'Treatment'])

exp_table = pd.DataFrame([[0, 0, 1, 1],
[2, 2, 4, 4],
[5, 5, 3, 3]],
index=['s1', 's2', 's3'],
columns=['o1', 'o2', 'o3', 'o4'])

exp_metadata = pd.DataFrame([['a', 'control'],
['b', 'control'],
['c', 'diseased']],
index=['s1', 's2', 's3'],
columns=['Barcode', 'Treatment'])

res_table, res_metadata = match(table, metadata, intersect=True)
pdt.assert_frame_equal(exp_table, res_table)
pdt.assert_frame_equal(exp_metadata, res_metadata)

def test_match_mismatch(self):
table = pd.DataFrame([[0, 0, 1, 1],
[2, 2, 4, 4],
[5, 5, 3, 3],
[0, 0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['o1', 'o2', 'o3', 'o4'])
metadata = pd.DataFrame([['a', 'control'],
['c', 'diseased'],
['b', 'control']],
index=['s1', 's3', 's2'],
columns=['Barcode', 'Treatment'])
with self.assertRaises(ValueError):
match(table, metadata)

def test_match_tips(self):
table = pd.DataFrame([[0, 0, 1, 1],
[2, 2, 4, 4],
[5, 5, 3, 3],
[0, 0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['a', 'b', 'c', 'd'])
tree = TreeNode.read([u"(((a,b)f, c),d)r;"])
exp_table, exp_tree = table, tree
res_table, res_tree = match_tips(table, tree)
pdt.assert_frame_equal(exp_table, res_table)
self.assertEqual(str(exp_tree), str(res_tree))

def test_match_tips_scrambled_tips(self):
table = pd.DataFrame([[0, 0, 1, 1],
[2, 3, 4, 4],
[5, 5, 3, 3],
[0, 0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['a', 'b', 'c', 'd'])
tree = TreeNode.read([u"(((b,a)f, c),d)r;"])
exp_tree = tree
exp_table = pd.DataFrame([[0, 0, 1, 1],
[3, 2, 4, 4],
[5, 5, 3, 3],
[0, 0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['b', 'a', 'c', 'd'])

res_table, res_tree = match_tips(table, tree)
pdt.assert_frame_equal(exp_table, res_table)
self.assertEqual(str(exp_tree), str(res_tree))

def test_match_tips_scrambled_columns(self):
table = pd.DataFrame([[0, 0, 1, 1],
[3, 2, 4, 4],
[5, 5, 3, 3],
[0, 0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['b', 'a', 'c', 'd'])
tree = TreeNode.read([u"(((a,b)f, c),d)r;"])
exp_tree = tree
exp_table = pd.DataFrame([[0, 0, 1, 1],
[2, 3, 4, 4],
[5, 5, 3, 3],
[0, 0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['a', 'b', 'c', 'd'])

res_table, res_tree = match_tips(table, tree)
pdt.assert_frame_equal(exp_table, res_table)
self.assertEqual(str(exp_tree), str(res_tree))

def test_match_tips_intersect_tips(self):
# there are less tree tips than table columns
table = pd.DataFrame([[0, 0, 1, 1],
[2, 3, 4, 4],
[5, 5, 3, 3],
[0, 0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['a', 'b', 'c', 'd'])
tree = TreeNode.read([u"((a,b)f,d)r;"])
exp_table = pd.DataFrame([[0, 0, 1],
[2, 3, 4],
[5, 5, 3],
[0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['a', 'b', 'd'])
exp_tree = tree
res_table, res_tree = match_tips(table, tree, intersect=True)
pdt.assert_frame_equal(exp_table, res_table)
self.assertEqual(str(exp_tree), str(res_tree))

def test_match_tips_intersect_columns(self):
# table has less columns than tree tips
table = pd.DataFrame([[0, 0, 1],
[2, 3, 4],
[5, 5, 3],
[0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['a', 'b', 'd'])
tree = TreeNode.read([u"(((a,b)f, c),d)r;"])
exp_table = pd.DataFrame([[1, 0, 0],
[4, 2, 3],
[3, 5, 5],
[1, 0, 0]],
index=['s1', 's2', 's3', 's4'],
columns=['d', 'a', 'b'])
exp_tree = TreeNode.read([u"(d,(a,b)f)r;"])
res_table, res_tree = match_tips(table, tree, intersect=True)
pdt.assert_frame_equal(exp_table, res_table)
self.assertEqual(str(exp_tree), str(res_tree))

def test_match_tips_intersect_tree_immutable(self):
# tests to see if tree chnages.
table = pd.DataFrame([[0, 0, 1],
[2, 3, 4],
[5, 5, 3],
[0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['a', 'b', 'd'])
tree = TreeNode.read([u"(((a,b)f, c),d)r;"])
match_tips(table, tree, intersect=True)
self.assertEqual(str(tree), u"(((a,b)f,c),d)r;\n")

def test_match_tips_mismatch(self):
# table has less columns than tree tips
table = pd.DataFrame([[0, 0, 1],
[2, 3, 4],
[5, 5, 3],
[0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['a', 'b', 'd'])
tree = TreeNode.read([u"(((a,b)f, c),d)r;"])
with self.assertRaises(ValueError):
match_tips(table, tree)

table = pd.DataFrame([[0, 0, 1, 1],
[2, 3, 4, 4],
[5, 5, 3, 3],
[0, 0, 0, 1]],
index=['s1', 's2', 's3', 's4'],
columns=['a', 'b', 'c', 'd'])
tree = TreeNode.read([u"((a,b)f,d)r;"])
with self.assertRaises(ValueError):
match_tips(table, tree)

def test_rename_internal_nodes(self):
tree = TreeNode.read([u"(((a,b), c),d)r;"])
exp_tree = TreeNode.read([u"(((a,b)y2, c)y1,d)y0;"])
res_tree = rename_internal_nodes(tree)
self.assertEqual(str(exp_tree), str(res_tree))

def test_rename_internal_nodes_names(self):
tree = TreeNode.read([u"(((a,b), c),d)r;"])
exp_tree = TreeNode.read([u"(((a,b)ab, c)abc,d)r;"])
res_tree = rename_internal_nodes(tree, ['r', 'abc', 'ab'])
self.assertEqual(str(exp_tree), str(res_tree))

def test_rename_internal_nodes_names_mismatch(self):
tree = TreeNode.read([u"(((a,b), c),d)r;"])
with self.assertRaises(ValueError):
rename_internal_nodes(tree, ['r', 'abc'])

def test_rename_internal_nodes_warning(self):
tree = TreeNode.read([u"(((a,b)y2, c),d)r;"])
with self.assertWarns(Warning):
rename_internal_nodes(tree)

def test_rename_internal_nodes_immutable(self):
tree = TreeNode.read([u"(((a,b)y2, c),d)r;"])
rename_internal_nodes(tree)
self.assertEqual(str(tree), "(((a,b)y2,c),d)r;\n")


if __name__ == '__main__':
unittest.main()
Loading