Skip to content

Commit

Permalink
[WIP] Implement records - heterogenous dataset collections.
Browse files Browse the repository at this point in the history
Existing dataset colleciton types are meant to be homogenous - all datasets of the same time. This introduces CWL-style record dataset collections.
  • Loading branch information
jmchilton committed Aug 4, 2017
1 parent d7ed115 commit 2841da4
Show file tree
Hide file tree
Showing 13 changed files with 209 additions and 30 deletions.
22 changes: 17 additions & 5 deletions lib/galaxy/dataset_collections/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,23 @@
from galaxy.util.odict import odict


def build_collection( type, dataset_instances ):
def build_collection( type, dataset_instances, fields=None ):
"""
Build DatasetCollection with populated DatasetcollectionElement objects
corresponding to the supplied dataset instances or throw exception if
this is not a valid collection of the specified type.
"""
dataset_collection = model.DatasetCollection( )
set_collection_elements( dataset_collection, type, dataset_instances )
dataset_collection = model.DatasetCollection( fields=fields )
set_collection_elements( dataset_collection, type, dataset_instances, fields=fields )
return dataset_collection


def set_collection_elements( dataset_collection, type, dataset_instances ):
def set_collection_elements( dataset_collection, type, dataset_instances, fields=None ):
element_index = 0
elements = []
for element in type.generate_elements( dataset_instances ):
if fields == "auto":
fields = guess_fields(dataset_instances)
for element in type.generate_elements( dataset_instances, fields=fields ):
element.element_index = element_index
element.collection = dataset_collection
elements.append( element )
Expand All @@ -27,6 +29,16 @@ def set_collection_elements( dataset_collection, type, dataset_instances ):
return dataset_collection


def guess_fields(dataset_instances):
fields = []
for identifier, element in dataset_instances.items():
# TODO: Make generic enough to handle nested record types.
assert element.history_content_type == "dataset"
fields.append({"class": "File", "name": identifier})

return fields


class CollectionBuilder(object):
""" Purely functional builder pattern for building a dataset collection. """

Expand Down
13 changes: 9 additions & 4 deletions lib/galaxy/dataset_collections/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@

from .types import (
list,
paired
paired,
record,
)

PLUGIN_CLASSES = [list.ListDatasetCollectionType, paired.PairedDatasetCollectionType]
PLUGIN_CLASSES = [
list.ListDatasetCollectionType,
paired.PairedDatasetCollectionType,
record.RecordDatasetCollectionType,
]


class DatasetCollectionTypesRegistry(object):
Expand All @@ -16,12 +21,12 @@ def __init__( self, app ):
def get( self, plugin_type ):
return self.__plugins[ plugin_type ]

def prototype( self, plugin_type ):
def prototype( self, plugin_type, fields=None ):
plugin_type_object = self.get( plugin_type )
if not hasattr( plugin_type_object, 'prototype_elements' ):
raise Exception( "Cannot pre-determine structure for collection of type %s" % plugin_type )

dataset_collection = model.DatasetCollection()
elements = [ e for e in plugin_type_object.prototype_elements() ]
elements = [ e for e in plugin_type_object.prototype_elements( fields=fields ) ]
dataset_collection.elements = elements
return dataset_collection
7 changes: 4 additions & 3 deletions lib/galaxy/dataset_collections/type_description.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ def __init__( self, type_registry ):
# I think.
self.type_registry = type_registry

def for_collection_type( self, collection_type ):
return CollectionTypeDescription( collection_type, self )
def for_collection_type( self, collection_type, fields=None ):
return CollectionTypeDescription( collection_type, self, fields=fields )


class CollectionTypeDescription( object ):
Expand Down Expand Up @@ -40,9 +40,10 @@ class CollectionTypeDescription( object ):
'list'
"""

def __init__( self, collection_type, collection_type_description_factory ):
def __init__( self, collection_type, collection_type_description_factory, fields=None ):
self.collection_type = collection_type
self.collection_type_description_factory = collection_type_description_factory
self.fields = fields
self.__has_subcollections = self.collection_type.find( ":" ) > 0

def effective_collection_type_description( self, subcollection_type ):
Expand Down
5 changes: 4 additions & 1 deletion lib/galaxy/dataset_collections/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
class DatasetCollectionType(object):

@abstractmethod
def generate_elements( self, dataset_instances ):
def generate_elements( self, dataset_instances, **kwds ):
""" Generate DatasetCollectionElements with corresponding
to the supplied dataset instances or throw exception if
this is not a valid collection of the specified type.
Expand All @@ -24,5 +24,8 @@ def generate_elements( self, dataset_instances ):

class BaseDatasetCollectionType( DatasetCollectionType ):

def __init__(self, **kwds):
pass

def _validation_failed( self, message ):
raise exceptions.ObjectAttributeInvalidException( message )
5 changes: 1 addition & 4 deletions lib/galaxy/dataset_collections/types/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@ class ListDatasetCollectionType( BaseDatasetCollectionType ):
"""
collection_type = "list"

def __init__( self ):
pass

def generate_elements( self, elements ):
def generate_elements( self, elements, **kwds ):
for identifier, element in elements.items():
association = DatasetCollectionElement(
element=element,
Expand Down
7 changes: 2 additions & 5 deletions lib/galaxy/dataset_collections/types/paired.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@ class PairedDatasetCollectionType( BaseDatasetCollectionType ):
"""
collection_type = "paired"

def __init__( self ):
pass

def generate_elements( self, elements ):
def generate_elements( self, elements, **kwds ):
forward_dataset = elements.get( FORWARD_IDENTIFIER, None )
reverse_dataset = elements.get( REVERSE_IDENTIFIER, None )
if not forward_dataset or not reverse_dataset:
Expand All @@ -33,7 +30,7 @@ def generate_elements( self, elements ):
yield left_association
yield right_association

def prototype_elements( self ):
def prototype_elements( self, **kwds ):
left_association = DatasetCollectionElement(
element=HistoryDatasetAssociation(),
element_identifier=FORWARD_IDENTIFIER,
Expand Down
43 changes: 43 additions & 0 deletions lib/galaxy/dataset_collections/types/record.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from galaxy.exceptions import RequestParameterMissingException
from galaxy.model import DatasetCollectionElement, HistoryDatasetAssociation

from ..types import BaseDatasetCollectionType


class RecordDatasetCollectionType(BaseDatasetCollectionType):
"""Arbitrary CWL-style record type."""

collection_type = "record"

def generate_elements(self, elements, **kwds):
fields = kwds.get("fields", None)
if fields is None:
raise RequestParameterMissingException("Missing or null parameter fields required for record types.")
if len(elements) != len(fields):
self._validation_failed("Supplied element do not match fields.")
index = 0
for identifier, element in elements.items():
field = fields[index]
if field["name"] != identifier:
self._validation_failed("Supplied element do not match fields.")

# TODO: validate type and such.
association = DatasetCollectionElement(
element=element,
element_identifier=identifier,
)
yield association
index += 1

def prototype_elements( self, fields=None, **kwds ):
if fields is None:
raise RequestParameterMissingException("Missing or null parameter fields required for record types.")
for field in fields:
name = field.get("name", None)
assert name
assert field.get("type", "File")
field_dataset = DatasetCollectionElement(
element=HistoryDatasetAssociation(),
element_identifier=name,
)
yield field_dataset
11 changes: 6 additions & 5 deletions lib/galaxy/managers/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__( self, app ):

def create( self, trans, parent, name, collection_type, element_identifiers=None,
elements=None, implicit_collection_info=None, trusted_identifiers=None,
hide_source_items=False, tags=None):
hide_source_items=False, tags=None, fields=None ):
"""
PRECONDITION: security checks on ability to add to parent
occurred during load.
Expand All @@ -62,6 +62,7 @@ def create( self, trans, parent, name, collection_type, element_identifiers=None
element_identifiers=element_identifiers,
elements=elements,
hide_source_items=hide_source_items,
fields=fields,
)

if isinstance( parent, model.History ):
Expand Down Expand Up @@ -112,12 +113,12 @@ def create( self, trans, parent, name, collection_type, element_identifiers=None
return self.__persist( dataset_collection_instance )

def create_dataset_collection( self, trans, collection_type, element_identifiers=None, elements=None,
hide_source_items=None ):
hide_source_items=None, fields=None ):
if element_identifiers is None and elements is None:
raise RequestParameterInvalidException( ERROR_INVALID_ELEMENTS_SPECIFICATION )
if not collection_type:
raise RequestParameterInvalidException( ERROR_NO_COLLECTION_TYPE )
collection_type_description = self.collection_type_descriptions.for_collection_type( collection_type )
collection_type_description = self.collection_type_descriptions.for_collection_type( collection_type, fields=fields )
has_subcollections = collection_type_description.has_subcollections( )
# If we have elements, this is an internal request, don't need to load
# objects from identifiers.
Expand All @@ -141,12 +142,12 @@ def create_dataset_collection( self, trans, collection_type, element_identifiers

if elements is not self.ELEMENTS_UNINITIALIZED:
type_plugin = collection_type_description.rank_type_plugin()
dataset_collection = builder.build_collection( type_plugin, elements )
dataset_collection = builder.build_collection( type_plugin, elements, fields=fields )
if hide_source_items:
log.debug("Hiding source items during dataset collection creation")
for dataset in dataset_collection.dataset_instances:
dataset.visible = False
else:
# TODO: Pass fields here - need test case first.
dataset_collection = model.DatasetCollection( populated=False )
dataset_collection.collection_type = collection_type
return dataset_collection
Expand Down
3 changes: 2 additions & 1 deletion lib/galaxy/managers/collections_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def api_payload_to_create_params( payload ):
collection_type=payload.get( "collection_type" ),
element_identifiers=payload.get( "element_identifiers" ),
name=payload.get( "name", None ),
hide_source_items=string_as_bool( payload.get( "hide_source_items", False ) )
hide_source_items=string_as_bool( payload.get( "hide_source_items", False ) ),
fields=payload.get( "fields", None ),
)
return params

Expand Down
3 changes: 3 additions & 0 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3220,9 +3220,12 @@ def __init__(
id=None,
collection_type=None,
populated=True,
fields=None,
):
self.id = id
self.collection_type = collection_type
# TODO: persist fields...
self.fields = fields
if not populated:
self.populated_state = DatasetCollection.populated_states.NEW

Expand Down
5 changes: 4 additions & 1 deletion lib/galaxy/tools/actions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,10 @@ def handle_output( name, output, hidden=None ):
assert not element_identifiers # known_outputs must have been empty
element_kwds = dict(elements=collections_manager.ELEMENTS_UNINITIALIZED)
else:
element_kwds = dict(element_identifiers=element_identifiers)
element_kwds = dict(
element_identifiers=element_identifiers,
fields=output.structure.fields,
)

output_collections.create_collection(
output=output,
Expand Down
3 changes: 2 additions & 1 deletion lib/galaxy/tools/parser/output_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def __init__(
self.collection_type_source = collection_type_source
self.structured_like = structured_like
self.dataset_collector_descriptions = dataset_collector_descriptions
self.fields = fields
if collection_type and collection_type_source:
raise ValueError("Cannot set both type and type_source on collection output.")
if collection_type is None and structured_like is None and dataset_collector_descriptions is None and collection_type_source is None:
Expand All @@ -219,7 +220,7 @@ def collection_prototype( self, inputs, type_registry ):
if self.structured_like:
collection_prototype = inputs[ self.structured_like ].collection
else:
collection_prototype = type_registry.prototype( self.collection_type )
collection_prototype = type_registry.prototype( self.collection_type, fields=self.fields )
return collection_prototype


Expand Down
Loading

0 comments on commit 2841da4

Please sign in to comment.