From 38b1bfc417fff40c42f6ba396e3ab87ca0b88475 Mon Sep 17 00:00:00 2001 From: Oddant1 Date: Fri, 28 May 2021 12:57:42 -0700 Subject: [PATCH] QIIME 2: Strip properties from semantic types (#2) --- lib/galaxy/datatypes/qiime2.py | 80 ++++++++++++++++++++++++- test/unit/data/datatypes/test_qiime2.py | 74 +++++++++++++++++++++++ 2 files changed, 152 insertions(+), 2 deletions(-) create mode 100644 test/unit/data/datatypes/test_qiime2.py diff --git a/lib/galaxy/datatypes/qiime2.py b/lib/galaxy/datatypes/qiime2.py index ed0946c525a0..10222a40b9cb 100644 --- a/lib/galaxy/datatypes/qiime2.py +++ b/lib/galaxy/datatypes/qiime2.py @@ -1,6 +1,7 @@ import io -import zipfile +import ast import uuid as _uuid +import zipfile import yaml @@ -8,6 +9,19 @@ from galaxy.datatypes.metadata import MetadataElement +def strip_properties(expression): + try: + expression_tree = ast.parse(expression) + reconstructer = PredicateRemover() + reconstructer.visit(expression_tree) + return reconstructer.expression + # If we have any problems stripping properties just use the full expression + # this punts the error off to q2galaxy so if we error we do so there and + # not here + except Exception: + return expression + + class QIIME2Result(CompressedZipArchive): MetadataElement(name="semantic_type", readonly=True) MetadataElement(name="semantic_type_simple", readonly=True, visible=False) @@ -21,7 +35,8 @@ def set_meta(self, dataset, overwrite=True, **kwd): if value: setattr(dataset.metadata, key, value) - dataset.metadata.semantic_type_simple = 'TODO' + dataset.metadata.semantic_type_simple = \ + strip_properties(dataset.metadata.semantic_type) def set_peek(self, dataset, is_multi_byte=False): if dataset.metadata.semantic_type == 'Visualization': @@ -76,6 +91,67 @@ def sniff(self, filename): return metadata and metadata['semantic_type'] == 'Visualization' +# Python 3.9 has a built in unparse. We can probably use this in the future +# when we are using 3.9 +# https://docs.python.org/3.9/library/ast.html#ast.unparse +class PredicateRemover(ast.NodeVisitor): + binops = { + ast.Add: ' + ', + ast.Sub: ' - ', + ast.Mult: ' * ', + ast.Div: ' / ', + ast.FloorDiv: ' // ', + ast.Pow: ' ** ', + ast.LShift: ' << ', + ast.RShift: ' >> ', + ast.BitOr: ' | ', + ast.BitXor: ' ^ ', + ast.BitAnd: ' & ', + ast.MatMult: ' @ ' + } + + def __init__(self): + self.expression = '' + self.tuple_count = 0 + self.in_index = False + + super().__init__() + + def visit_Name(self, node): + print(node.id) + if self.tuple_count == 0: + self.expression += node.id + else: + self.expression += node.id + ', ' + self.tuple_count -= 1 + + self.generic_visit(node) + + def visit_Index(self, node): + pre_strip = len(self.expression) + self.expression = self.expression.rstrip(', ') + post_strip = len(self.expression) + + self.expression += '[' + self.generic_visit(node) + self.expression += ']' + + # If we stripped the space for the next tuple element to accomodate a + # nested index we now need to add that space back + if post_strip < pre_strip: + self.expression += ', ' + + def visit_Tuple(self, node): + self.tuple_count = len(node.elts) - 1 + self.generic_visit(node) + + def visit_BinOp(self, node): + self.visit(node.left) + if not isinstance(node.op, ast.Mod): + self.expression += self.binops[node.op.__class__] + self.visit(node.right) + + def _get_metadata_from_archive(archive): uuid = _get_uuid(archive) archive_version, framework_version = _get_versions(archive, uuid) diff --git a/test/unit/data/datatypes/test_qiime2.py b/test/unit/data/datatypes/test_qiime2.py new file mode 100644 index 000000000000..902c78725f93 --- /dev/null +++ b/test/unit/data/datatypes/test_qiime2.py @@ -0,0 +1,74 @@ +import unittest + +from galaxy.datatypes.qiime2 import strip_properties + + +# Note: Not all the expressions here are completely valid types they are just +# representative examples +class TestStripProperties(unittest.TestCase): + def test_simple(self): + simple_expression = 'Taxonomy % Properties("SILVIA")' + stripped_expression = 'Taxonomy' + + reconstructed_expression = strip_properties(simple_expression) + self.assertEqual(reconstructed_expression, stripped_expression) + + def test_single(self): + single_expression = 'FeatureData[Taxonomy % Properties("SILVIA")]' + stripped_expression = 'FeatureData[Taxonomy]' + + reconstructed_expression = strip_properties(single_expression) + self.assertEqual(reconstructed_expression, stripped_expression) + + def test_double(self): + double_expression = ('FeatureData[Taxonomy % Properties("SILVIA"), ' + 'DistanceMatrix % Axes("ASV", "ASV")]') + stripped_expression = 'FeatureData[Taxonomy, DistanceMatrix]' + + reconstructed_expression = strip_properties(double_expression) + self.assertEqual(reconstructed_expression, stripped_expression) + + def test_nested(self): + nested_expression = ('Tuple[FeatureData[Taxonomy % ' + 'Properties("SILVIA")] % Axes("ASV", "ASV")]') + stripped_expression = 'Tuple[FeatureData[Taxonomy]]' + + reconstructed_expression = strip_properties(nested_expression) + self.assertEqual(reconstructed_expression, stripped_expression) + + def test_complex(self): + complex_expression = \ + ('Tuple[FeatureData[Taxonomy % Properties("SILVA")] % Axis("ASV")' + ', DistanceMatrix % Axes("ASV", "ASV")] % Unique') + stripped_expression = 'Tuple[FeatureData[Taxonomy], DistanceMatrix]' + + reconstructed_expression = strip_properties(complex_expression) + self.assertEqual(reconstructed_expression, stripped_expression) + + def test_keep_different_binop(self): + expression_with_different_binop = \ + ('FeatureData[Taxonomy % Properties("SILVIA"), ' + 'Taxonomy & Properties]') + stripped_expression = \ + 'FeatureData[Taxonomy, Taxonomy & Properties]' + + reconstructed_expression = \ + strip_properties(expression_with_different_binop) + self.assertEqual(reconstructed_expression, stripped_expression) + + def test_multiple_strings(self): + simple_expression = 'Taxonomy % Properties("SILVIA")' + stripped_simple_expression = 'Taxonomy' + + reconstructed_simple_expression = strip_properties(simple_expression) + + single_expression = 'FeatureData[Taxonomy % Properties("SILVIA")]' + stripped_single_expression = 'FeatureData[Taxonomy]' + + reconstructed_single_expression = strip_properties(single_expression) + + self.assertEqual(reconstructed_simple_expression, + stripped_simple_expression) + self.assertEqual(reconstructed_single_expression, + stripped_single_expression) +