forked from galaxyproject/galaxy
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Track workflow step input definitions in our model.
We don't track workflow step inputs in any formal way in our model currently. This has resulted in some current hacks and prevents future enhancements. This commit splits WorkflowStepConnection into two models WorkflowStepInput and WorkflowStepConnection - normalizing the previous table workflow_step_connection on input step and input name. In terms of current hacks forced on it by restricting all of tool state to be confined to a big JSON blob in the database - we have problems distinguishing keys and values when walking tool state. As we store more and more JSON blobs inside of the giant tool state blob - the worse this problem gets. Take for instance checking for runtime parameters or the rules parameter values - these both use JSON blobs that aren't simple values, so it is hard to tell looking at the tool state blob in the database or the workflow export to tell what is a key or what is a value. Tracking state as normalized inputs with default values and explicit attributes runtime values should allow much more percise state definition and construction. This variant of the models would also potentially allow defining runtime values with non-tool default values (so default values defined for the workflow but still explicitly settable at runtime). The combinations of overriding defaults and defining runtime values were not representable before. In terms of future enhancements, there is a lot we cannot track with the current models - such as map/reduce options for collection operations (galaxyproject#4623 (comment)). This should enable a lot of that. Obviously there are a lot of attributes defined here that are not yet utilized, but I'm using most (all?) of them downstream in the CWL branch. I'd rather populate this table fully realized and fill in the implementation around it as work continues to stream in from the CWL branch - to keep things simple and avoid extra database migrations. But I understand if this feels like speculative complexity we want to avoid despite the implementation being readily available for inspection downstream.
- Loading branch information
Showing
5 changed files
with
199 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
105 changes: 105 additions & 0 deletions
105
lib/galaxy/model/migrate/versions/0144_add_workflow_step_input.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
""" | ||
Migration script for workflow step input table. | ||
""" | ||
from __future__ import print_function | ||
|
||
import logging | ||
|
||
from sqlalchemy import Boolean, Column, ForeignKey, Integer, MetaData, TEXT, Table | ||
|
||
from galaxy.model.custom_types import JSONType | ||
|
||
log = logging.getLogger(__name__) | ||
metadata = MetaData() | ||
|
||
|
||
def get_new_tables(): | ||
|
||
WorkflowStepInput_table = Table( | ||
"workflow_step_input", metadata, | ||
Column("id", Integer, primary_key=True), | ||
Column("workflow_step_id", Integer, ForeignKey("workflow_step.id"), index=True), | ||
Column("name", TEXT), | ||
Column("merge_type", TEXT), | ||
Column("scatter_type", TEXT), | ||
Column("value_from", JSONType), | ||
Column("value_from_type", TEXT), | ||
Column("default_value", JSONType), | ||
Column("default_value_set", Boolean, default=False), | ||
Column("runtime_value", Boolean, default=False), | ||
) | ||
|
||
WorkflowStepConnection_table = Table( | ||
"workflow_step_connection", metadata, | ||
Column("id", Integer, primary_key=True), | ||
Column("output_step_id", Integer, ForeignKey("workflow_step.id"), index=True), | ||
Column("input_step_input_id", Integer, ForeignKey("workflow_step_input.id"), index=True), | ||
Column("output_name", TEXT), | ||
Column("input_subworkflow_step_id", Integer, ForeignKey("workflow_step.id"), index=True), | ||
) | ||
|
||
return [ | ||
WorkflowStepInput_table, WorkflowStepConnection_table | ||
] | ||
|
||
|
||
def upgrade(migrate_engine): | ||
metadata.bind = migrate_engine | ||
print(__doc__) | ||
metadata.reflect() | ||
|
||
LegacyWorkflowStepConnection_table = Table("workflow_step_connection", metadata, autoload=True) | ||
for index in LegacyWorkflowStepConnection_table.indexes: | ||
index.drop() | ||
LegacyWorkflowStepConnection_table.rename("workflow_step_connection_premigrate144") | ||
# Try to deregister that table to work around some caching problems it seems. | ||
LegacyWorkflowStepConnection_table.deregister() | ||
metadata._remove_table("workflow_step_connection", metadata.schema) | ||
|
||
metadata.reflect() | ||
tables = get_new_tables() | ||
for table in tables: | ||
__create(table) | ||
|
||
insert_step_inputs_cmd = \ | ||
"INSERT INTO workflow_step_input (workflow_step_id, name) " + \ | ||
"SELECT id, input_name FROM workflow_step_connection_premigrate144" | ||
|
||
migrate_engine.execute(insert_step_inputs_cmd) | ||
|
||
# TODO: verify order here. | ||
insert_step_connections_cmd = \ | ||
"INSERT INTO workflow_step_connection (output_step_id, input_step_input_id, output_name, input_subworkflow_step_id) " + \ | ||
"SELECT wsc.output_step_id, wsi.id, wsc.output_name, wsc.input_subworkflow_step_id " + \ | ||
"FROM workflow_step_connection_premigrate144 as wsc left outer join workflow_step_input as wsi on wsc.input_step_id = wsi.workflow_step_id and wsc.input_name = wsi.name ORDER BY wsc.id" | ||
|
||
migrate_engine.execute(insert_step_connections_cmd) | ||
|
||
|
||
def downgrade(migrate_engine): | ||
metadata.bind = migrate_engine | ||
|
||
tables = get_new_tables() | ||
for table in tables: | ||
__drop(table) | ||
|
||
metadata._remove_table("workflow_step_connection", metadata.schema) | ||
metadata.reflect() | ||
|
||
# Drop new workflow invocation step and job association table and restore legacy data. | ||
LegacyWorkflowStepConnection_table = Table("workflow_step_connection_premigrate144", metadata, autoload=True) | ||
LegacyWorkflowStepConnection_table.rename("workflow_step_connection") | ||
|
||
|
||
def __create(table): | ||
try: | ||
table.create() | ||
except Exception: | ||
log.exception("Creating %s table failed.", table.name) | ||
|
||
|
||
def __drop(table): | ||
try: | ||
table.drop() | ||
except Exception: | ||
log.exception("Dropping %s table failed.", table.name) |