From 1fa4ea2a8a1e363e51f6928c7a056c0d22ebb99c Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 22 Aug 2017 14:45:01 -0400 Subject: [PATCH] Allow multiple simulatenous uploads via single POST. The upload.py tool itself already allowed this and update upload dataset grouping to handle this. --- lib/galaxy/tools/parameters/__init__.py | 7 +- lib/galaxy/tools/parameters/grouping.py | 87 ++++++++++++++++++----- test/api/test_tools.py | 93 +++++++++++++++++++++++++ tools/data_source/upload.py | 4 +- tools/data_source/upload.xml | 5 ++ 5 files changed, 171 insertions(+), 25 deletions(-) diff --git a/lib/galaxy/tools/parameters/__init__.py b/lib/galaxy/tools/parameters/__init__.py index 5c5fe29fa32c..e39ae638e4be 100644 --- a/lib/galaxy/tools/parameters/__init__.py +++ b/lib/galaxy/tools/parameters/__init__.py @@ -298,11 +298,10 @@ def populate_state(request_context, inputs, incoming, state, errors={}, prefix=' elif input.type == 'section': populate_state(request_context, input.inputs, incoming, group_state, errors, prefix=group_prefix, context=context, check=check) elif input.type == 'upload_dataset': - d_type = input.get_datatype(request_context, context=context) - writable_files = d_type.writable_files - while len(group_state) > len(writable_files): + file_count = input.get_file_count(request_context, context) + while len(group_state) > file_count: del group_state[-1] - while len(writable_files) > len(group_state): + while file_count > len(group_state): new_state = {'__index__' : len(group_state)} for upload_item in input.inputs.values(): new_state[upload_item.name] = upload_item.get_initial_value(request_context, context) diff --git a/lib/galaxy/tools/parameters/grouping.py b/lib/galaxy/tools/parameters/grouping.py index 1531bcef5980..cf2301149274 100644 --- a/lib/galaxy/tools/parameters/grouping.py +++ b/lib/galaxy/tools/parameters/grouping.py @@ -219,17 +219,30 @@ def get_file_base_name(self, context): fd = context.get('files_metadata|base_name', 'Galaxy_Composite_file') return fd - def get_file_type(self, context): - return context.get(self.file_type_name, self.default_file_type) - - def get_datatype_ext(self, trans, context): - ext = self.get_file_type(context) + def get_file_type(self, context, parent_context=None): + file_type = context.get(self.file_type_name, None) + if file_type == "": + if parent_context: + file_type = parent_context.get(self.file_type_name, self.default_file_type) + else: + file_type = self.default_file_type + return file_type + + def get_dbkey(self, context, parent_context=None): + dbkey = context.get("dbkey", None) + if dbkey == "": + if parent_context: + dbkey = parent_context.get("dbkey", dbkey) + return dbkey + + def get_datatype_ext(self, trans, context, parent_context=None): + ext = self.get_file_type(context, parent_context=parent_context) if ext in self.file_type_to_ext: ext = self.file_type_to_ext[ext] # when using autodetect, we will use composite info from 'text', i.e. only the main file return ext - def get_datatype(self, trans, context): - ext = self.get_datatype_ext(trans, context) + def get_datatype(self, trans, context, parent_context=None): + ext = self.get_datatype_ext(trans, context, parent_context=parent_context) return trans.app.datatypes_registry.get_datatype_by_extension(ext) @property @@ -249,6 +262,8 @@ def title_by_index(self, trans, index, context): if composite_file.optional: rval = "%s [optional]" % rval return rval + if index < self.get_file_count(trans, context): + return "Extra primary file" return None def value_to_basic(self, value, app): @@ -279,10 +294,14 @@ def value_from_basic(self, value, app, ignore_errors=False): rval.append(rval_dict) return rval + def get_file_count(self, trans, context): + file_count = context.get("file_count", "auto") + return len(self.get_datatype(trans, context).writable_files) if file_count == "auto" else int(file_count) + def get_initial_value(self, trans, context): - d_type = self.get_datatype(trans, context) + file_count = self.get_file_count(trans, context) rval = [] - for i, (composite_name, composite_file) in enumerate(d_type.writable_files.items()): + for i in range(file_count): rval_dict = {} rval_dict['__index__'] = i # create __index__ for input in self.inputs.values(): @@ -356,6 +375,8 @@ def get_one_filename(context): name = context.get('NAME', None) info = context.get('INFO', None) uuid = context.get('uuid', None) or None # Turn '' to None + file_type = context.get('file_type', None) + dbkey = self.get_dbkey(context) warnings = [] to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: @@ -405,6 +426,10 @@ def get_one_filename(context): file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab file_bunch.uuid = uuid + if file_type is not None: + file_bunch.file_type = file_type + if dbkey is not None: + file_bunch.dbkey = dbkey return file_bunch, warnings def get_filenames(context): @@ -414,6 +439,8 @@ def get_filenames(context): uuid = context.get('uuid', None) or None # Turn '' to None name = context.get('NAME', None) info = context.get('INFO', None) + file_type = context.get('file_type', None) + dbkey = self.get_dbkey(context) to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True @@ -429,6 +456,11 @@ def get_filenames(context): file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab + if file_type is not None: + file_bunch.file_type = file_type + if dbkey is not None: + file_bunch.dbkey = dbkey + rval.append(file_bunch) for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info): if file_bunch.path: @@ -436,6 +468,11 @@ def get_filenames(context): file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab + if file_type is not None: + file_bunch.file_type = file_type + if dbkey is not None: + file_bunch.dbkey = dbkey + rval.append(file_bunch) # look for files uploaded via FTP valid_files = [] @@ -474,15 +511,20 @@ def get_filenames(context): file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab + if file_type is not None: + file_bunch.file_type = file_type + if dbkey is not None: + file_bunch.dbkey = dbkey rval.append(file_bunch) return rval file_type = self.get_file_type(context) + file_count = self.get_file_count(trans, context) d_type = self.get_datatype(trans, context) - dbkey = context.get('dbkey', None) + dbkey = self.get_dbkey(context) tag_using_filenames = context.get('tag_using_filenames', False) writable_files = d_type.writable_files writable_files_offset = 0 - groups_incoming = [None for _ in writable_files] + groups_incoming = [None for _ in range(file_count)] for group_incoming in context.get(self.name, []): i = int(group_incoming['__index__']) groups_incoming[i] = group_incoming @@ -524,6 +566,10 @@ def get_filenames(context): dataset.to_posix_lines = file_bunch.to_posix_lines dataset.auto_decompress = file_bunch.auto_decompress dataset.space_to_tab = file_bunch.space_to_tab + if file_bunch.file_type: + dataset.file_type = file_type + if file_bunch.dbkey: + dataset.dbkey = dbkey dataset.warnings.extend(warnings) if dataset.primary_file is None: # remove this before finish, this should create an empty dataset raise Exception('No primary dataset file was available for composite upload') @@ -544,15 +590,18 @@ def get_filenames(context): dataset.warnings.append("A required composite file (%s) was not specified." % (key)) return [dataset] else: - datasets = get_filenames(context[self.name][0]) rval = [] - for dataset in datasets: - dataset.file_type = file_type - dataset.datatype = d_type - dataset.ext = self.get_datatype_ext(trans, context) - dataset.dbkey = dbkey - dataset.tag_using_filenames = tag_using_filenames - rval.append(dataset) + for i, file_contexts in enumerate(context[self.name]): + datasets = get_filenames(file_contexts) + for dataset in datasets: + override_file_type = self.get_file_type(context[self.name][i], parent_context=context) + d_type = self.get_datatype(trans, context[self.name][i], parent_context=context) + dataset.file_type = override_file_type + dataset.datatype = d_type + dataset.ext = self.get_datatype_ext(trans, context[self.name][i], parent_context=context) + dataset.dbkey = self.get_dbkey(context[self.name][i], parent_context=context) + dataset.tag_using_filenames = tag_using_filenames + rval.append(dataset) return rval diff --git a/test/api/test_tools.py b/test/api/test_tools.py index 7b7e7cd878b9..ff90e019f564 100644 --- a/test/api/test_tools.py +++ b/test/api/test_tools.py @@ -185,6 +185,99 @@ def _get_roadmaps_content(self, history_id, dataset): roadmaps_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=dataset, filename="Roadmaps") return roadmaps_content + def test_upload_dbkey(self): + with self.dataset_populator.test_history() as history_id: + payload = self.dataset_populator.upload_payload(history_id, "Test123", dbkey="hg19") + run_response = self.dataset_populator.tools_post(payload) + self.dataset_populator.wait_for_tool_run(history_id, run_response) + datasets = run_response.json()["outputs"] + assert datasets[0].get("genome_build") == "hg19", datasets[0] + + def test_upload_multiple_files_1(self): + with self.dataset_populator.test_history() as history_id: + payload = self.dataset_populator.upload_payload(history_id, "Test123", + dbkey="hg19", + extra_inputs={ + "files_1|url_paste": "SecondOutputContent", + "files_1|NAME": "SecondOutputName", + "files_1|file_type": "tabular", + "files_1|dbkey": "hg18", + "file_count": "2", + } + ) + run_response = self.dataset_populator.tools_post(payload) + self.dataset_populator.wait_for_tool_run(history_id, run_response) + datasets = run_response.json()["outputs"] + + assert len(datasets) == 2, datasets + content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0]) + assert content.strip() == "Test123" + assert datasets[0]["file_ext"] == "txt" + assert datasets[0]["genome_build"] == "hg19", datasets + + content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1]) + assert content.strip() == "SecondOutputContent" + assert datasets[1]["file_ext"] == "tabular" + assert datasets[1]["genome_build"] == "hg18", datasets + + def test_upload_multiple_files_2(self): + with self.dataset_populator.test_history() as history_id: + payload = self.dataset_populator.upload_payload(history_id, "Test123", + file_type="tabular", + dbkey="hg19", + extra_inputs={ + "files_1|url_paste": "SecondOutputContent", + "files_1|NAME": "SecondOutputName", + "files_1|file_type": "txt", + "files_1|dbkey": "hg18", + "file_count": "2", + } + ) + run_response = self.dataset_populator.tools_post(payload) + self.dataset_populator.wait_for_tool_run(history_id, run_response) + datasets = run_response.json()["outputs"] + + assert len(datasets) == 2, datasets + content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0]) + assert content.strip() == "Test123" + assert datasets[0]["file_ext"] == "tabular", datasets + assert datasets[0]["genome_build"] == "hg19", datasets + + content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1]) + assert content.strip() == "SecondOutputContent" + assert datasets[1]["file_ext"] == "txt" + assert datasets[1]["genome_build"] == "hg18", datasets + + def test_upload_multiple_files_3(self): + with self.dataset_populator.test_history() as history_id: + payload = self.dataset_populator.upload_payload(history_id, "Test123", + file_type="tabular", + dbkey="hg19", + extra_inputs={ + "files_0|file_type": "txt", + "files_0|dbkey": "hg18", + "files_1|url_paste": "SecondOutputContent", + "files_1|NAME": "SecondOutputName", + "files_1|file_type": "txt", + "files_1|dbkey": "hg18", + "file_count": "2", + } + ) + run_response = self.dataset_populator.tools_post(payload) + self.dataset_populator.wait_for_tool_run(history_id, run_response) + datasets = run_response.json()["outputs"] + + assert len(datasets) == 2, datasets + content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0]) + assert content.strip() == "Test123" + assert datasets[0]["file_ext"] == "txt", datasets + assert datasets[0]["genome_build"] == "hg18", datasets + + content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1]) + assert content.strip() == "SecondOutputContent" + assert datasets[1]["file_ext"] == "txt" + assert datasets[1]["genome_build"] == "hg18", datasets + def test_unzip_collection(self): with self.dataset_populator.test_history() as history_id: hdca_id = self.__build_pair(history_id, ["123", "456"]) diff --git a/tools/data_source/upload.py b/tools/data_source/upload.py index 3011e5f9967a..a0445f3b4911 100644 --- a/tools/data_source/upload.py +++ b/tools/data_source/upload.py @@ -408,7 +408,7 @@ def __main__(): dataset = util.bunch.Bunch(**safe_dict(dataset)) try: output_path = output_paths[int(dataset.dataset_id)][0] - except: + except Exception: print('Output path for dataset %s not found on command line' % dataset.dataset_id, file=sys.stderr) sys.exit(1) if dataset.type == 'composite': @@ -422,7 +422,7 @@ def __main__(): # parent directory is writable by the user. try: os.remove(sys.argv[3]) - except: + except Exception: pass diff --git a/tools/data_source/upload.xml b/tools/data_source/upload.xml index d4d7795289d1..8d3fe1959c56 100644 --- a/tools/data_source/upload.xml +++ b/tools/data_source/upload.xml @@ -32,6 +32,7 @@ + @@ -43,6 +44,10 @@ + + + +