Skip to content

Commit

Permalink
QIIME 2: Strip properties from semantic types (#2)
Browse files Browse the repository at this point in the history
  • Loading branch information
Oddant1 authored and ebolyen committed Jul 5, 2022
1 parent 2db3d50 commit 38b1bfc
Show file tree
Hide file tree
Showing 2 changed files with 152 additions and 2 deletions.
80 changes: 78 additions & 2 deletions lib/galaxy/datatypes/qiime2.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,27 @@
import io
import zipfile
import ast
import uuid as _uuid
import zipfile

import yaml

from galaxy.datatypes.binary import CompressedZipArchive
from galaxy.datatypes.metadata import MetadataElement


def strip_properties(expression):
try:
expression_tree = ast.parse(expression)
reconstructer = PredicateRemover()
reconstructer.visit(expression_tree)
return reconstructer.expression
# If we have any problems stripping properties just use the full expression
# this punts the error off to q2galaxy so if we error we do so there and
# not here
except Exception:
return expression


class QIIME2Result(CompressedZipArchive):
MetadataElement(name="semantic_type", readonly=True)
MetadataElement(name="semantic_type_simple", readonly=True, visible=False)
Expand All @@ -21,7 +35,8 @@ def set_meta(self, dataset, overwrite=True, **kwd):
if value:
setattr(dataset.metadata, key, value)

dataset.metadata.semantic_type_simple = 'TODO'
dataset.metadata.semantic_type_simple = \
strip_properties(dataset.metadata.semantic_type)

def set_peek(self, dataset, is_multi_byte=False):
if dataset.metadata.semantic_type == 'Visualization':
Expand Down Expand Up @@ -76,6 +91,67 @@ def sniff(self, filename):
return metadata and metadata['semantic_type'] == 'Visualization'


# Python 3.9 has a built in unparse. We can probably use this in the future
# when we are using 3.9
# https://docs.python.org/3.9/library/ast.html#ast.unparse
class PredicateRemover(ast.NodeVisitor):
binops = {
ast.Add: ' + ',
ast.Sub: ' - ',
ast.Mult: ' * ',
ast.Div: ' / ',
ast.FloorDiv: ' // ',
ast.Pow: ' ** ',
ast.LShift: ' << ',
ast.RShift: ' >> ',
ast.BitOr: ' | ',
ast.BitXor: ' ^ ',
ast.BitAnd: ' & ',
ast.MatMult: ' @ '
}

def __init__(self):
self.expression = ''
self.tuple_count = 0
self.in_index = False

super().__init__()

def visit_Name(self, node):
print(node.id)
if self.tuple_count == 0:
self.expression += node.id
else:
self.expression += node.id + ', '
self.tuple_count -= 1

self.generic_visit(node)

def visit_Index(self, node):
pre_strip = len(self.expression)
self.expression = self.expression.rstrip(', ')
post_strip = len(self.expression)

self.expression += '['
self.generic_visit(node)
self.expression += ']'

# If we stripped the space for the next tuple element to accomodate a
# nested index we now need to add that space back
if post_strip < pre_strip:
self.expression += ', '

def visit_Tuple(self, node):
self.tuple_count = len(node.elts) - 1
self.generic_visit(node)

def visit_BinOp(self, node):
self.visit(node.left)
if not isinstance(node.op, ast.Mod):
self.expression += self.binops[node.op.__class__]
self.visit(node.right)


def _get_metadata_from_archive(archive):
uuid = _get_uuid(archive)
archive_version, framework_version = _get_versions(archive, uuid)
Expand Down
74 changes: 74 additions & 0 deletions test/unit/data/datatypes/test_qiime2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import unittest

from galaxy.datatypes.qiime2 import strip_properties


# Note: Not all the expressions here are completely valid types they are just
# representative examples
class TestStripProperties(unittest.TestCase):
def test_simple(self):
simple_expression = 'Taxonomy % Properties("SILVIA")'
stripped_expression = 'Taxonomy'

reconstructed_expression = strip_properties(simple_expression)
self.assertEqual(reconstructed_expression, stripped_expression)

def test_single(self):
single_expression = 'FeatureData[Taxonomy % Properties("SILVIA")]'
stripped_expression = 'FeatureData[Taxonomy]'

reconstructed_expression = strip_properties(single_expression)
self.assertEqual(reconstructed_expression, stripped_expression)

def test_double(self):
double_expression = ('FeatureData[Taxonomy % Properties("SILVIA"), '
'DistanceMatrix % Axes("ASV", "ASV")]')
stripped_expression = 'FeatureData[Taxonomy, DistanceMatrix]'

reconstructed_expression = strip_properties(double_expression)
self.assertEqual(reconstructed_expression, stripped_expression)

def test_nested(self):
nested_expression = ('Tuple[FeatureData[Taxonomy % '
'Properties("SILVIA")] % Axes("ASV", "ASV")]')
stripped_expression = 'Tuple[FeatureData[Taxonomy]]'

reconstructed_expression = strip_properties(nested_expression)
self.assertEqual(reconstructed_expression, stripped_expression)

def test_complex(self):
complex_expression = \
('Tuple[FeatureData[Taxonomy % Properties("SILVA")] % Axis("ASV")'
', DistanceMatrix % Axes("ASV", "ASV")] % Unique')
stripped_expression = 'Tuple[FeatureData[Taxonomy], DistanceMatrix]'

reconstructed_expression = strip_properties(complex_expression)
self.assertEqual(reconstructed_expression, stripped_expression)

def test_keep_different_binop(self):
expression_with_different_binop = \
('FeatureData[Taxonomy % Properties("SILVIA"), '
'Taxonomy & Properties]')
stripped_expression = \
'FeatureData[Taxonomy, Taxonomy & Properties]'

reconstructed_expression = \
strip_properties(expression_with_different_binop)
self.assertEqual(reconstructed_expression, stripped_expression)

def test_multiple_strings(self):
simple_expression = 'Taxonomy % Properties("SILVIA")'
stripped_simple_expression = 'Taxonomy'

reconstructed_simple_expression = strip_properties(simple_expression)

single_expression = 'FeatureData[Taxonomy % Properties("SILVIA")]'
stripped_single_expression = 'FeatureData[Taxonomy]'

reconstructed_single_expression = strip_properties(single_expression)

self.assertEqual(reconstructed_simple_expression,
stripped_simple_expression)
self.assertEqual(reconstructed_single_expression,
stripped_single_expression)

0 comments on commit 38b1bfc

Please sign in to comment.