Skip to content

Commit

Permalink
Optimizations for dynamic output files collection.
Browse files Browse the repository at this point in the history
During the set hid and update quota parts of this operation - skip a bunch of extra flushes and skip checking for extra files since these cannot be created in conjunction with dynamic file discovery currently.

Cut this part of that operation from about 50 seconds to 7 when creating a collection with 1000 elements on my laptop against a local postgres database.
  • Loading branch information
jmchilton committed Jun 20, 2018
1 parent f9ba72f commit 12aa76d
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 7 deletions.
16 changes: 11 additions & 5 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1403,7 +1403,6 @@ def add_datasets(self, sa_session, datasets, parent_id=None, genome_build=None,
if quota and self.user:
disk_usage = sum([d.get_total_size() for d in datasets])
self.user.adjust_total_disk_usage(disk_usage)

sa_session.add_all(datasets)
if flush:
sa_session.flush()
Expand Down Expand Up @@ -1981,10 +1980,17 @@ def get_size(self, nice_size=False):
else:
return self._calculate_size()

def set_size(self):
"""Sets the size of the data on disk"""
def set_size(self, no_extra_files=False):
"""Sets the size of the data on disk.
If the caller is sure there are no extra files, pass no_extra_files as True to optimize subsequent
calls to get_total_size or set_total_size - potentially avoiding both a database flush and check against
the file system.
f """
if not self.file_size:
self.file_size = self._calculate_size()
if no_extra_files:
self.total_size = self.file_size

def get_total_size(self):
if self.total_size is not None:
Expand Down Expand Up @@ -2189,9 +2195,9 @@ def get_size(self, nice_size=False):
return galaxy.util.nice_size(self.dataset.get_size())
return self.dataset.get_size()

def set_size(self):
def set_size(self, **kwds):
"""Sets and gets the size of the data on disk"""
return self.dataset.set_size()
return self.dataset.set_size(**kwds)

def get_total_size(self):
return self.dataset.get_total_size()
Expand Down
6 changes: 4 additions & 2 deletions lib/galaxy/tools/parameters/output_collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,8 @@ def create_dataset(
else:
primary_data.link_to(filename)

primary_data.set_size()
# We are sure there are no extra files, so optimize things that follow by settting total size also.
primary_data.set_size(no_extra_files=True)
# If match specified a name use otherwise generate one from
# designation.
primary_data.name = name
Expand Down Expand Up @@ -586,7 +587,8 @@ def collect_primary_datasets(tool, output, tool_provided_metadata, job_working_d
sa_session.flush()
# Move data from temp location to dataset location
app.object_store.update_from_file(primary_data.dataset, file_name=filename, create=True)
primary_data.set_size()
# We are sure there are no extra files, so optimize things that follow by settting total size also.
primary_data.set_size(no_extra_files=True)
# If match specified a name use otherwise generate one from
# designation.
primary_data.name = fields_match.name or "%s (%s)" % (outdata.name, designation)
Expand Down

0 comments on commit 12aa76d

Please sign in to comment.