Skip to content

Commit

Permalink
Remove TreeNode from the main workflow (#360)
Browse files Browse the repository at this point in the history
* ENH: First pass of something that roughly works

* TST: Add tests to the new version of validate_tree

* ENH: Fix all tests

* TST: Fix JavaScript tests

* STY: Fix stylistic issues

Fixes #155

* BUG: Fix issue with root node

* ENH: Remove extra null check

Thanks @kwcantrell
  • Loading branch information
ElDeveloper authored Aug 31, 2020
1 parent 0549874 commit 6ade1a9
Show file tree
Hide file tree
Showing 12 changed files with 157 additions and 268 deletions.
4 changes: 2 additions & 2 deletions empress/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

from empress.tree import Tree
from empress.core import Empress

__all__ = ['Tree']
__all__ = ['Empress']
36 changes: 17 additions & 19 deletions empress/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

from empress.tree import Tree
from empress.tree import validate_tree
from empress.tools import (
fill_missing_node_names, match_inputs, shifting,
filter_feature_metadata_to_tree
match_inputs, shifting, filter_feature_metadata_to_tree
)
from empress.compression_utils import (
remove_empty_samples_and_features, compress_table,
Expand All @@ -22,7 +21,6 @@

from shutil import copytree
from emperor import Emperor
from bp import to_skbio_treenode
from jinja2 import Environment, FileSystemLoader

SUPPORT_FILES = pkg_resources.resource_filename('empress', 'support_files')
Expand Down Expand Up @@ -186,11 +184,7 @@ def _validate_and_match_data(self, ignore_missing_samples,
self.tip_md, self.int_md, self.tree
)

# extract balance parenthesis
self._bp_tree = list(self.tree.B)

self.tree = Tree.from_tree(to_skbio_treenode(self.tree))
fill_missing_node_names(self.tree)
validate_tree(self.tree)

def copy_support_files(self, target=None):
"""Copies the support files to a target directory
Expand Down Expand Up @@ -279,26 +273,30 @@ def _to_dict(self):
# bptree indices start at one, hence we pad the arrays
names = [-1]
lengths = [-1]
for i, node in enumerate(self.tree.postorder(include_self=True), 1):
names.append(node.name)
lengths.append(node.length)
if node.name in fid2idxs_t:
fid2idxs[i] = fid2idxs_t[node.name]
for i in range(1, len(self.tree) + 1):
node = self.tree.postorderselect(i)
name = self.tree.name(node)

names.append(name)
lengths.append(self.tree.length(node))

if name in fid2idxs_t:
fid2idxs[i] = fid2idxs_t[name]
f_ids[fid2idxs[i]] = i

if node.name in compressed_tm_tmp:
compressed_tm[i] = compressed_tm_tmp[node.name]
if name in compressed_tm_tmp:
compressed_tm[i] = compressed_tm_tmp[name]

# Note: for internal metadata, node names may not be unique. Thus,
# we duplicate the internal node metadata for each node in the
# metadata with the same name.
if node.name in compressed_im_tmp:
compressed_im[i] = compressed_im_tmp[node.name]
if name in compressed_im_tmp:
compressed_im[i] = compressed_im_tmp[name]

data_to_render = {
'base_url': self.base_url,
# tree info
'tree': shifting(self._bp_tree),
'tree': shifting(self.tree.B),
'lengths': lengths,
'names': names,
# feature table
Expand Down
3 changes: 1 addition & 2 deletions empress/support_files/js/drawer.js
Original file line number Diff line number Diff line change
Expand Up @@ -319,8 +319,7 @@ define(["glMatrix", "Camera"], function (gl, Camera) {
/**
* Display the tree nodes.
* Note: Currently Empress will only display the nodes that had an assigned
* name in the newick string. (I.E. Empress will not show any node that
* starts with EmpressNode)
* name in the newick string.
*
* Note: this will only take effect after draw() is called.
*
Expand Down
7 changes: 3 additions & 4 deletions empress/support_files/js/empress.js
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ define([
this._drawer.initialize();
this._events.setMouseEvents();
var nodeNames = this._tree.getAllNames();
nodeNames = nodeNames.filter((n) => !n.startsWith("EmpressNode"));
nodeNames = nodeNames.filter((n) => n !== null);
nodeNames.sort();
this._events.autocomplete(nodeNames);

Expand Down Expand Up @@ -694,7 +694,7 @@ define([
if (!this.getNodeInfo(node, "visible")) {
continue;
}
if (!this.getNodeInfo(node, "name").startsWith("EmpressNode")) {
if (this.getNodeInfo(node, "name") !== null) {
coords.push(
this.getX(node),
this.getY(node),
Expand Down Expand Up @@ -1941,8 +1941,7 @@ define([
/**
* Display the tree nodes.
* Note: Currently Empress will only display the nodes that had an assigned
* name in the newick string. (I.E. Empress will not show any node that
* starts with EmpressNode)
* name in the newick string.
*
* @param{Boolean} showTreeNodes If true then empress will display the tree
* nodes.
Expand Down
22 changes: 0 additions & 22 deletions empress/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,28 +23,6 @@ class DataMatchingWarning(Warning):
pass


def fill_missing_node_names(tree):
""" Names nodes in the tree without a name.
Parameters
----------
tree : skbio.TreeNode or empress.Tree
Input tree with potentially unnamed nodes (i.e. nodes' .name attributes
can be None).
Returns
-------
skbio.TreeNode or empress.Tree
Tree with all nodes assigned a name.
"""
current_unlabeled_node = 0
for n in tree.postorder(include_self=True):
if n.name is None:
new_name = 'EmpressNode{}'.format(current_unlabeled_node)
n.name = new_name
current_unlabeled_node += 1


def read(file_name, file_format='newick'):
""" Reads in contents from a file.
"""
Expand Down
177 changes: 64 additions & 113 deletions empress/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,130 +7,81 @@
# ----------------------------------------------------------------------------

import warnings
from skbio import TreeNode


class TreeFormatWarning(Warning):
pass


class Tree(TreeNode):
"""
Attributes
def validate_tree(tree):
"""Check for the validty of the tree
Parameters
----------
length
leafcount
height
depth
Notes
-----
`length` refers to the branch length of a node to its parent.
`leafcount` is the number of tips within a subtree. `height` refers
to the longest path from root to the deepst leaf in that subtree.
`depth` is the number of nodes found in the longest path.
tree : bp.BPTree
The tree to validate
"""

def __init__(self, use_lengths=False, **kwargs):
""" Constructs a Dendrogram object for visualization.
Parameters
----------
use_lengths: bool
Specifies if the branch lengths should be included in the
resulting visualization (default True).
Returns
-------
"""
super().__init__(**kwargs)
self.childRem = -1

@classmethod
def from_tree(cls, tree, use_lengths=True):
""" Creates an Tree object from a skbio tree.
Parameters
----------
tree : skbio.TreeNode
Input skbio tree
use_lengths: Boolean
Specify if the branch length should be incorporated into
the geometry calculations for visualization.
Returns
-------
Tree
"""
if tree.count() <= 1:
raise ValueError("Tree must contain at least 2 nodes.")

# While traversing the tree, record tip / internal node names
# (Nodes without names are ignored, since we'll assign those later
# using tools.fill_missing_node_names())
tip_names = []
internal_node_names = []
max_branch_length = 0
for n in tree.postorder(include_self=False):
if n.name is not None:
# NOTE: This should eventually be taken out when
# fill_missing_node_names() is refactored. However, for now,
# this makes sure that users can't accidentally break things by
# naming nodes identical to our default names for missing nodes
if n.name.startswith("EmpressNode"):
raise ValueError(
'Node names can\'t start with "EmpressNode".'
)
if n.is_tip():
tip_names.append(n.name)
else:
internal_node_names.append(n.name)
if n.length is None:
raise ValueError(
"Non-root branches of the tree must have lengths."
)

if n.length < 0:
raise ValueError(
"Non-root branches of the tree must have nonnegative "
"lengths."
)
max_branch_length = max(n.length, max_branch_length)

# We didn't consider the root node in the above traversal since we
# don't care about its length. However, we do care about its name,
# so we add the root's name to internal_node_names.
internal_node_names.append(tree.name)

if max_branch_length == 0:
raise ValueError(
"At least one non-root branch of the tree must have a "
"positive length."
)

unique_tip_name_set = set(tip_names)
if len(unique_tip_name_set) != len(tip_names):
raise ValueError("Tip names in the tree must be unique.")

unique_internal_node_name_set = set(internal_node_names)
if len(unique_tip_name_set & unique_internal_node_name_set) > 0:
# this is currently untested since we can't actually parse a tree of this
# nature: https://github.com/wasade/improved-octo-waddle/issues/29
if len(tree) <= 1:
raise ValueError("Tree must contain at least 2 nodes.")

# While traversing the tree, record tip / internal node names
# (Nodes without names are ignored, since we'll assign those later
# using tools.fill_missing_node_names())
tip_names = []
internal_node_names = []
max_branch_length = 0

# do not include the root in these checks
for i in range(1, len(tree)):
node = tree.postorderselect(i)
name = tree.name(node)
length = tree.length(node)

if name is not None:
if isleaf(tree, node):
tip_names.append(name)
else:
internal_node_names.append(name)

if length < 0:
raise ValueError(
"Tip names in the tree cannot overlap with internal node "
"names."
"Non-root branches of the tree must have nonnegative "
"lengths."
)

if len(unique_internal_node_name_set) != len(internal_node_names):
warnings.warn(
"Internal node names in the tree are not unique.",
TreeFormatWarning
)

for n in tree.postorder():
n.__class__ = Tree
n.tip_count = 0

return tree
max_branch_length = max(length, max_branch_length)

# We didn't consider the root node in the above traversal since we
# don't care about its length. However, we do care about its name,
# so we add the root's name to internal_node_names.
internal_node_names.append(tree.name(tree.postorderselect(i + 1)))

if max_branch_length == 0:
raise ValueError(
"At least one non-root branch of the tree must have a "
"positive length."
)

unique_tip_name_set = set(tip_names)
if len(unique_tip_name_set) != len(tip_names):
raise ValueError("Tip names in the tree must be unique.")

unique_internal_node_name_set = set(internal_node_names)
if len(unique_tip_name_set & unique_internal_node_name_set) > 0:
raise ValueError(
"Tip names in the tree cannot overlap with internal node "
"names."
)

if len(unique_internal_node_name_set) != len(internal_node_names):
warnings.warn(
"Internal node names in the tree are not unique.",
TreeFormatWarning
)

return


def bp_tree_tips(bp_tree):
Expand Down
Loading

0 comments on commit 6ade1a9

Please sign in to comment.