Skip to content

Commit

Permalink
Merge pull request galaxyproject#18928 from mvdbeek/add_filter_null_c…
Browse files Browse the repository at this point in the history
…ollection_operation_tool

Add filter null collection operation tool
  • Loading branch information
jmchilton authored Oct 4, 2024
2 parents d18bc68 + 1c2bfb9 commit e3f4068
Show file tree
Hide file tree
Showing 11 changed files with 129 additions and 9 deletions.
1 change: 1 addition & 0 deletions lib/galaxy/config/sample/tool_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
<tool file="${model_tools_path}/zip_collection.xml" />
<tool file="${model_tools_path}/filter_failed_collection.xml" />
<tool file="${model_tools_path}/filter_empty_collection.xml" />
<tool file="${model_tools_path}/filter_null.xml" />
<tool file="${model_tools_path}/flatten_collection.xml" />
<tool file="${model_tools_path}/merge_collection.xml" />
<tool file="${model_tools_path}/relabel_from_file.xml" />
Expand Down
4 changes: 3 additions & 1 deletion lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4723,8 +4723,10 @@ def set_skipped(self, object_store_populator: "ObjectStorePopulator") -> None:
self.state = self.states.OK
self.blurb = "skipped"
self.visible = False
null = json.dumps(None)
with open(self.dataset.get_file_name(), "w") as out:
out.write(json.dumps(None))
out.write(null)
self.peek = null
self.set_total_size()

def get_file_name(self, sync_cache: bool = True) -> str:
Expand Down
1 change: 1 addition & 0 deletions lib/galaxy/tool_util/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ class TestCollectionOutputAssertions(StrictModel):
class_: Optional[Literal["Collection"]] = Field("Collection", alias="class")
elements: Optional[Dict[str, TestCollectionElementAssertion]] = None
element_tests: Optional[Dict[str, "TestCollectionElementAssertion"]] = None
element_count: Optional[int] = None
attributes: Optional[CollectionAttributes] = None
collection_type: CollectionType = None

Expand Down
12 changes: 8 additions & 4 deletions lib/galaxy/tool_util/parser/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,10 +861,13 @@ def matches(ie_list: List, rel_path: str):
class TestCollectionOutputDef:
__test__ = False # Prevent pytest from discovering this class (issue #12071)

def __init__(self, name, attrib, element_tests):
def __init__(self, name, attrib, element_tests, element_count: Optional[int] = None):
self.name = name
self.collection_type = attrib.get("type", None)
count = attrib.get("count", None)
if element_count is not None:
count = element_count
else:
count = attrib.get("count")
self.count = int(count) if count is not None else None
self.attrib = attrib
self.element_tests = element_tests
Expand All @@ -874,7 +877,8 @@ def from_dict(as_dict):
return TestCollectionOutputDef(
name=as_dict["name"],
attrib=as_dict.get("attributes", {}),
element_tests=as_dict["element_tests"],
element_tests=as_dict.get("element_tests"),
element_count=as_dict.get("element_count"),
)

@staticmethod
Expand All @@ -891,7 +895,7 @@ def from_yaml_test_format(as_dict):
return TestCollectionOutputDef.from_dict(as_dict)

def to_dict(self):
return dict(name=self.name, attributes=self.attrib, element_tests=self.element_tests)
return dict(name=self.name, attributes=self.attrib, element_tests=self.element_tests, element_count=self.count)


class DrillDownOptionsDict(TypedDict):
Expand Down
5 changes: 3 additions & 2 deletions lib/galaxy/tool_util/verify/interactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1132,7 +1132,7 @@ def verify_collection(output_collection_def, data_collection, verify_dataset):
raise AssertionError(message)

expected_element_count = output_collection_def.count
if expected_element_count:
if expected_element_count is not None:
actual_element_count = len(data_collection["elements"])
if expected_element_count != actual_element_count:
message = f"Output collection '{name}': expected to have {expected_element_count} elements, but it had {actual_element_count}."
Expand Down Expand Up @@ -1185,7 +1185,8 @@ def verify_elements(element_objects, element_tests):
message = f"Output collection '{name}': identifier '{identifier}' found out of order, expected order of {expected_sort_order} for the tool generated collection elements {eo_ids}"
raise AssertionError(message)

verify_elements(data_collection["elements"], output_collection_def.element_tests)
if output_collection_def.element_tests:
verify_elements(data_collection["elements"], output_collection_def.element_tests)


def _verify_composite_datatype_file_content(
Expand Down
10 changes: 9 additions & 1 deletion lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3798,7 +3798,15 @@ class FilterNullTool(FilterDatasetsTool):
def element_is_valid(element: model.DatasetCollectionElement):
element_object = element.element_object
assert isinstance(element_object, model.DatasetInstance)
return element_object.extension == "expression.json" and element_object.blurb == "skipped"
if element_object.extension == "expression.json":
if element_object.peek == "null":
# shortcut
return False
else:
with open(element_object.get_file_name()) as fh:
if fh.read(5) == "null":
return False
return True


class FlattenTool(DatabaseOperationTool):
Expand Down
46 changes: 46 additions & 0 deletions lib/galaxy/tools/filter_null.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<tool id="__FILTER_NULL__" name="Filter null elements" version="1.0.0" tool_type="filter_null">
<description/>
<type class="FilterNullTool" module="galaxy.tools"/>
<action module="galaxy.tools.actions.model_operations" class="ModelOperationToolAction"/>
<edam_operations>
<edam_operation>operation_3695</edam_operation>
</edam_operations>
<inputs>
<param type="data_collection" collection_type="list,list:paired" name="input" label="Input Collection"/>
</inputs>
<outputs>
<collection name="output" format_source="input" type_source="input" label="${on_string} (without null datasets)">
</collection>
</outputs>
<tests>
<test>
<param name="input">
<collection type="list">
<element name="e1" value="simple_line.txt"/>
</collection>
</param>
<output_collection name="output" type="list" count="1">
<element name="e1">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$"/>
</assert_contents>
</element>
</output_collection>
</test>
</tests>
<help><![CDATA[
========
Synopsis
========
Removes null elements from a collection.
This tool takes a dataset collection and filters out nulls. This is useful for removing elements that resulted from conditional execution of jobs.
.. class:: infomark
This tool will create new history datasets from your collection but your quota usage will not increase.
]]></help>
</tool>
32 changes: 32 additions & 0 deletions lib/galaxy_test/workflow/filter_null.gxwf-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
- doc: |
Test to verify filter null tool keeps non-null datasets.
job:
input_collection:
collection_type: list
elements:
- identifier: first
content: "abc"
when:
value: true
type: raw
outputs:
out:
class: Collection
collection_type: list
element_count: 1
- doc: |
Test to verify filter null tool discards null datasets.
job:
input_collection:
collection_type: list
elements:
- identifier: first
content: "abc"
when:
value: false
type: raw
outputs:
out:
class: Collection
collection_type: list
element_count: 0
22 changes: 22 additions & 0 deletions lib/galaxy_test/workflow/filter_null.gxwf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
class: GalaxyWorkflow
inputs:
input_collection:
type: data_collection
when:
type: boolean
outputs:
out:
outputSource: filter_null/output
steps:
cat:
tool_id: cat
in:
input1:
source: input_collection
when:
source: when
when: $(inputs.when)
filter_null:
tool_id: '__FILTER_NULL__'
in:
input: cat/out_file1
4 changes: 3 additions & 1 deletion lib/galaxy_test/workflow/test_framework_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ def _verify(self, run_summary: RunJobsSummary, output_definitions: OutputsDict):
self._verify_output(run_summary, output_name, output_definition)

def _verify_output(self, run_summary: RunJobsSummary, output_name, test_properties: OutputChecks):
is_collection_test = isinstance(test_properties, dict) and "elements" in test_properties
is_collection_test = isinstance(test_properties, dict) and (
"elements" in test_properties or test_properties.get("class") == "Collection"
)
item_label = f"Output named {output_name}"

def get_filename(name):
Expand Down
1 change: 1 addition & 0 deletions test/functional/tools/sample_tool_conf.xml
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@
<tool file="${model_tools_path}/filter_failed_collection.xml" />
<tool file="${model_tools_path}/keep_success_collection.xml" />
<tool file="${model_tools_path}/filter_empty_collection.xml" />
<tool file="${model_tools_path}/filter_null.xml" />
<tool file="${model_tools_path}/flatten_collection.xml" />
<tool file="${model_tools_path}/sort_collection_list.xml" />
<tool file="${model_tools_path}/harmonize_two_collections_list.xml" />
Expand Down

0 comments on commit e3f4068

Please sign in to comment.