Merge pull request galaxyproject#1209 from simonbray/test-from-invoca…

…tion Generate workflow test from invocation id
bernt-matthias · Jan 5, 2022 · a46ecdc · a46ecdc
2 parents b96b1bb + 518d9e0
commit a46ecdc
Show file tree

Hide file tree

Showing 9 changed files with 172 additions and 19 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -17,7 +17,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ['3.6']
+        python-version: ['3.7']
         tox-action:
           - lint
           - lint_docs
@@ -30,8 +30,8 @@ jobs:
           #- unit-diagnostic-servetraining
           #- unit-diagnostic-servecmd
           #- unit-diagnostic-trainingwfcmd
-          - unit-nonredundant-noclientbuild-noshed-gx-2005
-          - unit-nonredundant-noclientbuild-noshed-gx-2009
+          - unit-nonredundant-noclientbuild-noshed-gx-2105
+          - unit-nonredundant-noclientbuild-noshed-gx-2109
           - unit-nonredundant-noclientbuild-noshed-gx-dev
           - unit-nonredundant-noclientbuild-noshed
           - unit-diagnostic-serveclientcmd

diff --git a/docs/best_practices_workflows.rst b/docs/best_practices_workflows.rst
@@ -91,6 +91,19 @@ with the ``workflow_test_init`` command.
 
     $ planemo workflow_test_init path/to/workflow.ga
 
+This command creates a template test file, with inputs, parameters and expected outputs
+left blank for you to fill in. If you've already run the workflow on an external Galaxy
+server, you can generate a more complete test file directly from the invocation ID using
+the ``--from_invocation`` option.
+
+::
+
+    $ planemo workflow_test_init --from_invocation <INVOCATION ID> --galaxy_url <GALAXY SERVER URL> --galaxy_user_key" <GALAXY API KEY>
+
+You also need to specify the server URL and your API key, as Galaxy invocation IDs are
+only unique to a particular server. You can obtain the invocation ID from
+``<GALAXY SERVER URL>/workflows/invocations``.
+
 Publishing
 ----------
 

diff --git a/planemo/commands/cmd_workflow_job_init.py b/planemo/commands/cmd_workflow_job_init.py
@@ -1,17 +1,27 @@
 """Module describing the planemo ``workflow_job_init`` command."""
+import os
+
 import click
 import yaml
 
 from planemo import options
 from planemo.cli import command_function
-from planemo.galaxy.workflows import job_template, new_workflow_associated_path
+from planemo.galaxy.workflows import (
+    get_workflow_from_invocation_id,
+    job_template,
+    new_workflow_associated_path
+)
 from planemo.io import can_write_to_path
 
 
 @click.command('workflow_job_init')
 @options.required_workflow_arg()
 @options.force_option()
 @options.workflow_output_artifact()
+@options.galaxy_url_option()
+@options.galaxy_user_key_option()
+@options.from_invocation()
+@options.profile_option()
 @command_function
 def cli(ctx, workflow_identifier, output=None, **kwds):
     """Initialize a Galaxy workflow job description for supplied workflow.
@@ -25,9 +35,16 @@ def cli(ctx, workflow_identifier, output=None, **kwds):
     as well so this command may be renamed to to job_init at something along those
     lines at some point.
     """
-    job = job_template(workflow_identifier)
+    if kwds["from_invocation"]:
+        if not os.path.isdir('test-data'):
+            ctx.log("Creating test-data directory.")
+            os.makedirs("test-data")
+        path_basename = get_workflow_from_invocation_id(workflow_identifier, kwds["galaxy_url"], kwds["galaxy_user_key"])
+
+    job = job_template(workflow_identifier, **kwds)
+
     if output is None:
-        output = new_workflow_associated_path(workflow_identifier, suffix="job")
+        output = new_workflow_associated_path(path_basename if kwds["from_invocation"] else workflow_identifier, suffix="job")
     if not can_write_to_path(output, **kwds):
         ctx.exit(1)
     with open(output, "w") as f_job:

diff --git a/planemo/commands/cmd_workflow_test_init.py b/planemo/commands/cmd_workflow_test_init.py
@@ -7,6 +7,7 @@
 from planemo import options
 from planemo.cli import command_function
 from planemo.galaxy.workflows import (
+    get_workflow_from_invocation_id,
     job_template,
     new_workflow_associated_path,
     output_stubs_for_workflow,
@@ -19,6 +20,10 @@
 @options.force_option()
 @options.workflow_output_artifact()
 @options.split_job_and_test()
+@options.galaxy_url_option()
+@options.galaxy_user_key_option()
+@options.from_invocation()
+@options.profile_option()
 @command_function
 def cli(ctx, workflow_identifier, output=None, split_test=False, **kwds):
     """Initialize a Galaxy workflow test description for supplied workflow.
@@ -27,21 +32,27 @@ def cli(ctx, workflow_identifier, output=None, split_test=False, **kwds):
     to ensure inputs and outputs comply with best practices that make workflow
     testing easier.
     """
-    path_basename = os.path.basename(workflow_identifier)
-    job = job_template(workflow_identifier)
+    if kwds["from_invocation"]:
+        if not os.path.isdir('test-data'):
+            ctx.log("Creating test-data directory.")
+            os.makedirs("test-data")
+        path_basename = get_workflow_from_invocation_id(workflow_identifier, kwds["galaxy_url"], kwds["galaxy_user_key"])
+    else:
+        path_basename = os.path.basename(workflow_identifier)
+    job = job_template(workflow_identifier, **kwds)
     if output is None:
-        output = new_workflow_associated_path(workflow_identifier)
-    job_output = new_workflow_associated_path(workflow_identifier, suffix="job1")
+        output = new_workflow_associated_path(path_basename if kwds["from_invocation"] else workflow_identifier)
+    job_output = new_workflow_associated_path(path_basename if kwds["from_invocation"] else workflow_identifier, suffix="job1")
     if not can_write_to_path(output, **kwds):
         ctx.exit(1)
 
     test_description = [{
          'doc': 'Test outline for %s' % path_basename,
          'job': job,
-         'outputs': output_stubs_for_workflow(workflow_identifier),
+         'outputs': output_stubs_for_workflow(workflow_identifier, **kwds),
     }]
     if split_test:
-        job_output = new_workflow_associated_path(workflow_identifier, suffix="job1")
+        job_output = new_workflow_associated_path(path_basename if kwds["from_invocation"] else workflow_identifier, suffix="job1")
         if not can_write_to_path(job_output, **kwds):
             ctx.exit(1)
 

diff --git a/planemo/galaxy/workflows.py b/planemo/galaxy/workflows.py
@@ -12,6 +12,7 @@
 from gxformat2.interface import ImporterGalaxyInterface
 from gxformat2.normalize import inputs_normalized, outputs_normalized
 
+from planemo.galaxy.api import gi
 from planemo.io import warn
 
 FAILED_REPOSITORIES_MESSAGE = "Failed to install one or more repositories."
@@ -201,23 +202,28 @@ def output_labels(workflow_path):
     return [o["id"] for o in outputs]
 
 
-def output_stubs_for_workflow(workflow_path):
+def output_stubs_for_workflow(workflow_path, **kwds):
     """
     Return output labels and class.
     """
+    if kwds.get("from_invocation"):
+        return _job_outputs_template_from_invocation(workflow_path, kwds["galaxy_url"], kwds["galaxy_user_key"])
     outputs = {}
     for label in output_labels(workflow_path):
         if not label.startswith('_anonymous_'):
             outputs[label] = {'class': ''}
     return outputs
 
 
-def job_template(workflow_path):
+def job_template(workflow_path, **kwds):
     """Return a job template for specified workflow.
 
     A dictionary describing non-optional inputs that must be specified to
     run the workflow.
     """
+    if kwds.get("from_invocation"):
+        return _job_inputs_template_from_invocation(workflow_path, kwds["galaxy_url"], kwds["galaxy_user_key"])
+
     template = {}
     for required_input_step in required_input_steps(workflow_path):
         i_label = input_label(required_input_step)
@@ -280,6 +286,86 @@ def rewrite_job_file(input_file, output_file, job):
         yaml.dump(job_contents, f)
 
 
+def get_workflow_from_invocation_id(invocation_id, galaxy_url, galaxy_api_key):
+    user_gi = gi(url=galaxy_url, key=galaxy_api_key)
+    workflow_id = user_gi.invocations.show_invocation(invocation_id)['workflow_id']
+    workflow = user_gi.workflows._get(workflow_id, params={'instance': 'true'})
+    workflow_name = '-'.join(workflow["name"].split())
+    user_gi.workflows.export_workflow_to_local_path(use_default_filename=False, file_local_path=f'./{workflow_name}.ga', workflow_id=workflow["id"])
+
+    return workflow_name
+
+
+def _job_inputs_template_from_invocation(invocation_id, galaxy_url, galaxy_api_key):
+    def _template_from_collection(user_gi, collection_id):
+        collection = user_gi.dataset_collections.show_dataset_collection(collection_id)
+        template = {
+            "class": "Collection",
+            "collection_type": collection["collection_type"],
+            "elements": []
+        }
+        for element in collection["elements"]:
+            if element["element_type"] == "hdca":
+                template['elements'].append(_template_from_collection(element["object"]["id"]))
+            elif element["element_type"] == "hda":
+                user_gi.datasets.download_dataset(element["object"]["id"], use_default_filename=False,
+                                                  file_path=f"test-data/{input_step['label']}_{element['element_identifier']}.{ext}")
+                template['elements'].append(
+                    {
+                        "class": "File",
+                        "identifier": element['element_identifier'],
+                        "path": f"test-data/{input_step['label']}_{element['element_identifier']}.{ext}",
+                    }
+                )
+            return template
+
+    user_gi = gi(url=galaxy_url, key=galaxy_api_key)
+    invocation = user_gi.invocations.show_invocation(invocation_id)
+    template = {}
+    for input_step in invocation['inputs'].values():
+        if input_step["src"] == "hda":
+            ext = user_gi.datasets.show_dataset(input_step["id"])["extension"]
+            user_gi.datasets.download_dataset(input_step["id"], use_default_filename=False, file_path=f"test-data/{input_step['label']}.{ext}")
+            template[input_step['label']] = {
+                "class": "File",
+                "path": f"test-data/{input_step['label']}.{ext}",
+                "filetype": ext
+            }
+        elif input_step["src"] == "hdca":
+            template[input_step['label']] = _template_from_collection(user_gi, input_step["id"])
+    for param, param_step in invocation['input_step_parameters'].items():
+        template[param] = param_step["parameter_value"]
+
+    return template
+
+
+def _job_outputs_template_from_invocation(invocation_id, galaxy_url, galaxy_api_key):
+    user_gi = gi(url=galaxy_url, key=galaxy_api_key)
+    invocation = user_gi.invocations.show_invocation(invocation_id)
+    outputs = {}
+    for label, output in invocation["outputs"].items():
+        ext = user_gi.datasets.show_dataset(output["id"])["extension"]
+        user_gi.datasets.download_dataset(output["id"], use_default_filename=False, file_path=f"test-data/{label}.{ext}")
+        outputs[label] = {
+            'file': f"test-data/{label}.{ext}"
+        }
+    for label, output in invocation["output_collections"].items():
+        collection = user_gi.dataset_collections.show_dataset_collection(output['id'])
+        if ':' not in collection["collection_type"]:
+            user_gi.datasets.download_dataset(collection["elements"][0]["object"]["id"], use_default_filename=False,
+                                              file_path=f"test-data/{label}.{collection['elements'][0].get('extension', 'txt')}")
+            outputs[label] = {
+                'element_tests': {  # only check the first element
+                    collection["elements"][0]["element_identifier"]: f"test-data/{label}.{collection['elements'][0]['extension']}"
+                }
+            }
+        else:
+            outputs[label] = {
+                'element_tests': 'nested_collection_todo'
+            }
+    return outputs
+
+
 __all__ = (
     "import_workflow",
     "describe_outputs",

diff --git a/planemo/options.py b/planemo/options.py
@@ -727,6 +727,18 @@ def split_job_and_test():
     return click.option("--split_test/--no_split_test", default=False, help="Write workflow job and test definitions to separate files.")
 
 
+def from_invocation():
+    return planemo_option(
+        "--from_invocation/--from_uri",
+        is_flag=True,
+        default=False,
+        help="Build a workflow test or job description from an invocation ID run on an external Galaxy."
+             "A Galaxy URL and API key must also be specified. This allows test data to be downloaded"
+             "and inputs and parameters defined automatically. Alternatively, the default is to build the"
+             "descriptions from a provided workflow URI."
+    )
+
+
 def required_job_arg():
     """Decorate click method as requiring the path to a single tool.
     """

diff --git a/planemo/virtualenv.py b/planemo/virtualenv.py
@@ -22,5 +22,9 @@ def create_command(virtualenv_path, galaxy_python_version=None):
         python = os.path.abspath(python)
     else:
         python = sys.executable or 'python'
-    command = [python, '-m', 'venv', virtualenv_path]
+    virtualenv_on_path = which('virtualenv')
+    if virtualenv_on_path:
+        command = [virtualenv_on_path, virtualenv_path, '-p', python]
+    else:
+        command = [python, '-m', 'venv', virtualenv_path]
     return " ".join(command)
diff --git a/tests/test_external_galaxy_commands.py b/tests/test_external_galaxy_commands.py
@@ -38,6 +38,8 @@ def test_plain_init(self):
                 rerun_cmd = ["rerun", "--invocation", "invocation_id", "--profile", "test_ext_profile"]
                 upload_data_cmd = ["upload_data", "test_wf_alias", os.path.join(TEST_DATA_DIR, "wf2-job.yml"), "new-job.yml",
                                    "--profile", "test_ext_profile"]
+                workflow_test_init_cmd = ["workflow_test_init", "invocation_id", "--from_invocation", "--profile", "test_ext_profile"]
+                test_workflow_test_init_cmd = ["test", "TestWorkflow1.ga",  "--profile", "test_ext_profile"]
 
                 # test alias and profile creation
                 result = self._check_exit_code(profile_list_cmd)
@@ -69,10 +71,18 @@ def test_plain_init(self):
                 assert '1 jobs ok' in result.output or '"ok": 1' in result.output  # so it passes regardless if tabulate is installed or not
 
                 # test rerun
-                rerun_cmd[2] = config.user_gi.workflows.get_invocations(wfid)[0]['id']
+                invocation_id = config.user_gi.workflows.get_invocations(wfid)[0]['id']
+                rerun_cmd[2] = invocation_id
                 result = self._check_exit_code(rerun_cmd)
                 assert 'No jobs matching the specified invocation' in result.output
 
+                # test generating test case from invocation_id
+                workflow_test_init_cmd[1] = invocation_id
+                self._check_exit_code(workflow_test_init_cmd)
+                assert os.path.exists('TestWorkflow1.ga')
+                assert os.path.exists('TestWorkflow1-tests.yml')
+                self._check_exit_code(test_workflow_test_init_cmd)
+
                 # test alias and profile deletion
                 result = self._check_exit_code(alias_delete_cmd)
                 assert 'Alias test_wf_alias was successfully deleted from profile test_ext_profile' in result.output

diff --git a/tox.ini b/tox.ini
@@ -56,9 +56,9 @@ setenv =
     quick: PLANEMO_SKIP_GALAXY_TESTS=1
     master: PLANEMO_TEST_GALAXY_BRANCH=master
     dev: PLANEMO_TEST_GALAXY_BRANCH=dev
-    2009: PLANEMO_TEST_GALAXY_BRANCH=release_20.09
-    2005: PLANEMO_TEST_GALAXY_BRANCH=release_20.05
-    2001: PLANEMO_TEST_GALAXY_BRANCH=release_20.01
+    2109: PLANEMO_TEST_GALAXY_BRANCH=release_21.09
+    2105: PLANEMO_TEST_GALAXY_BRANCH=release_21.05
+    2101: PLANEMO_TEST_GALAXY_BRANCH=release_21.01
 skip_install =
     doc_test,lint,lint_docs,lint_docstrings,mypy,gxwf_test_test: True
 whitelist_externals =