Merge remote-tracking branch 'remotes/main/dev' into UserBasedObjectS…

…tore # Conflicts: # doc/source/lib/galaxy.objectstore.rst # doc/source/slideshow/architecture/images/objectstore.plantuml.svg # lib/galaxy/jobs/__init__.py
galaxyproject · VJalili · Feb 13, 2017 · Feb 13, 2017 · Feb 15, 2017 · Apr 27, 2017
commit 9176836cdb3d3ef9cad429c7fa281c426a9d34e9
diff --git a/doc/source/lib/galaxy.objectstore.rst b/doc/source/lib/galaxy.objectstore.rst
@@ -9,40 +9,8 @@ galaxy\.objectstore package
 Submodules
 ----------
 
-galaxy.objectstore.pulsar module
---------------------------------
-
-.. automodule:: galaxy.objectstore.pulsar
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-galaxy.objectstore.rods module
-------------------------------
-
-.. automodule:: galaxy.objectstore.rods
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-galaxy.objectstore.cloud module
-----------------------------
-
-.. automodule:: galaxy.objectstore.cloud
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-galaxy.objectstore.s3_multipart_upload module
----------------------------------------------
-
-.. automodule:: galaxy.objectstore.s3_multipart_upload
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-galaxy.objectstore.azure_blob module
----------------------------------------------
+galaxy\.objectstore\.azure\_blob module
+---------------------------------------
 
 .. automodule:: galaxy.objectstore.azure_blob
     :members:
@@ -65,10 +33,10 @@ galaxy\.objectstore\.rods module
     :undoc-members:
     :show-inheritance:
 
-galaxy\.objectstore\.s3 module
-------------------------------
+galaxy.objectstore.cloud module
+----------------------------
 
-.. automodule:: galaxy.objectstore.s3
+.. automodule:: galaxy.objectstore.cloud
     :members:
     :undoc-members:
     :show-inheritance:

diff --git a/doc/source/slideshow/architecture/images/objectstore.plantuml.svg b/doc/source/slideshow/architecture/images/objectstore.plantuml.svg
diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py
@@ -902,16 +902,10 @@ def get_special( ):
     def _create_working_directory( self ):
         job = self.get_job()
         try:
-            # TEMP BLOCK --- START
-            pluggedMedia = None
-            for pM in job.user.pluggedMedia:
-                pluggedMedia = pM
-                break
-            # TEMP BLOCK --- END
             self.app.object_store.create(
-                job, user=job.user, pluggedMedia=pluggedMedia, base_dir='job_work', dir_only=True, obj_dir=True)
+                job, base_dir='job_work', dir_only=True, obj_dir=True )
             self.working_directory = self.app.object_store.get_filename(
-                job, user=job.user, pluggedMedia=pluggedMedia, base_dir='job_work', dir_only=True, obj_dir=True)
+                job, base_dir='job_work', dir_only=True, obj_dir=True )
 
             # The tool execution is given a working directory beneath the
             # "job" working directory.
@@ -1005,13 +999,11 @@ def fail( self, message, exception=False, stdout="", stderr="", exit_code=None )
                 dataset.blurb = 'tool error'
                 dataset.info = message
                 dataset.set_size()
-                dataset.dataset.set_total_size( user=job.user, pluggedMedia=dataset.pluggedMedia )
+                dataset.dataset.set_total_size()
                 dataset.mark_unhidden()
                 if dataset.ext == 'auto':
                     dataset.extension = 'data'
-                # Update (non-library) job output datasets through the object store
-                if dataset not in job.output_library_datasets:
-                    self.app.object_store.update_from_file( dataset.dataset, user=job.user, pluggedMedia=dataset.pluggedMedia, create=True )
+                self.__update_output(job, dataset)
                 # Pause any dependent jobs (and those jobs' outputs)
                 for dep_job_assoc in dataset.dependent_jobs:
                     self.pause( dep_job_assoc.job, "Execution of this dataset's job is paused because its input datasets are in an error state." )
@@ -1222,19 +1214,20 @@ def finish(
             # should this also be checking library associations? - can a library item be added from a history before the job has ended? -
             # lets not allow this to occur
             # need to update all associated output hdas, i.e. history was shared with job running
-            #TODO: the following object called `dataset`, should be renamed to `hda`.
             for dataset in dataset_assoc.dataset.dataset.history_associations + dataset_assoc.dataset.dataset.library_associations:
-                trynum = 0
-                while trynum < self.app.config.retry_job_output_collection:
-                    try:
-                        # Attempt to short circuit NFS attribute caching
-                        os.stat( dataset.dataset.get_file_name( job.user ) )
-                        os.chown( dataset.dataset.get_file_name( job.user ), os.getuid(), -1 )
-                        trynum = self.app.config.retry_job_output_collection
-                    except ( OSError, ObjectNotFound ) as e:
-                        trynum += 1
-                        log.warning( 'Error accessing %s, will retry: %s', dataset.dataset.get_file_name( job.user ), e )
-                        time.sleep( 2 )
+                purged = dataset.dataset.purged
+                if not purged and dataset.dataset.external_filename is None:
+                    trynum = 0
+                    while trynum < self.app.config.retry_job_output_collection:
+                        try:
+                            # Attempt to short circuit NFS attribute caching
+                            os.stat( dataset.dataset.file_name )
+                            os.chown( dataset.dataset.file_name, os.getuid(), -1 )
+                            trynum = self.app.config.retry_job_output_collection
+                        except ( OSError, ObjectNotFound ) as e:
+                            trynum += 1
+                            log.warning( 'Error accessing %s, will retry: %s', dataset.dataset.file_name, e )
+                            time.sleep( 2 )
                 if getattr( dataset, "hidden_beneath_collection_instance", None ):
                     dataset.visible = False
                 dataset.blurb = 'done'
@@ -1252,8 +1245,20 @@ def finish(
                     dataset.dataset.uuid = context['uuid']
                 # Update (non-library) job output datasets through the object store
                 if dataset not in job.output_library_datasets:
-                    self.app.object_store.update_from_file(dataset.dataset, user=job.user, pluggedMedia=dataset.pluggedMedia, create=True)
-                self._collect_extra_files(dataset.dataset, self.working_directory)
+                    self.app.object_store.update_from_file(dataset.dataset, create=True)
+                self.__update_output(job, dataset)
+                if not purged:
+                    self._collect_extra_files(dataset.dataset, self.working_directory)
+                # Handle composite datatypes of auto_primary_file type
+                if dataset.datatype.composite_type == 'auto_primary_file' and not dataset.has_data():
+                    try:
+                        with NamedTemporaryFile() as temp_fh:
+                            temp_fh.write( dataset.datatype.generate_primary_file( dataset ) )
+                            temp_fh.flush()
+                            self.app.object_store.update_from_file( dataset.dataset, file_name=temp_fh.name, create=True )
+                            dataset.set_size()
+                    except Exception as e:
+                        log.warning( 'Unable to generate primary composite file automatically for %s: %s', dataset.dataset.id, e )
                 if job.states.ERROR == final_job_state:
                     dataset.blurb = "error"
                     dataset.mark_unhidden()
@@ -1270,7 +1275,7 @@ def finish(
                     if retry_internally and not self.external_output_metadata.external_metadata_set_successfully(dataset, self.sa_session ):
                         # If Galaxy was expected to sniff type and didn't - do so.
                         if dataset.ext == "_sniff_":
-                            extension = sniff.handle_uploaded_dataset_file( dataset.dataset.get_file_name( job.user ), self.app.datatypes_registry )
+                            extension = sniff.handle_uploaded_dataset_file( dataset.dataset.file_name, self.app.datatypes_registry )
                             dataset.extension = extension
 
                         # call datatype.set_meta directly for the initial set_meta call during dataset creation
@@ -1306,7 +1311,7 @@ def path_rewriter( path ):
                         else:
                             dataset.set_peek( line_count=context['line_count'] )
                     except:
-                        if ( not dataset.datatype.composite_type and dataset.dataset.is_multi_byte( user=job.user, pluggedMedia=dataset.pluggedMedia ) ) or self.tool.is_multi_byte:
+                        if ( not dataset.datatype.composite_type and dataset.dataset.is_multi_byte() ) or self.tool.is_multi_byte:
                             dataset.set_peek( is_multi_byte=True )
                         else:
                             dataset.set_peek()
@@ -1401,8 +1406,9 @@ def path_rewriter( path ):
         collected_bytes = 0
         # Once datasets are collected, set the total dataset size (includes extra files)
         for dataset_assoc in job.output_datasets:
-            dataset_assoc.dataset.dataset.set_total_size( user=job.user, pluggedMedia=dataset.pluggedMedia )
-            collected_bytes += dataset_assoc.dataset.dataset.get_total_size( user=job.user, pluggedMedia=dataset.pluggedMedia )
+            if not dataset_assoc.dataset.dataset.purged:
+                dataset_assoc.dataset.dataset.set_total_size()
+                collected_bytes += dataset_assoc.dataset.dataset.get_total_size()
 
         if job.user:
             job.user.adjust_total_disk_usage(collected_bytes)
@@ -1441,7 +1447,7 @@ def check_tool_output( self, stdout, stderr, tool_exit_code, job ):
 
     def cleanup( self, delete_files=True ):
         # At least one of these tool cleanup actions (job import), is needed
-        # for the tool to work properly, that is why one might want to run
+        # for thetool to work properly, that is why one might want to run
         # cleanup but not delete files.
         try:
             if delete_files:
@@ -1451,15 +1457,7 @@ def cleanup( self, delete_files=True ):
             galaxy.tools.imp_exp.JobExportHistoryArchiveWrapper( self.job_id ).cleanup_after_job( self.sa_session )
             galaxy.tools.imp_exp.JobImportHistoryArchiveWrapper( self.app, self.job_id ).cleanup_after_job()
             if delete_files:
-                job = self.get_job()
-                # TEMP BLOCK --- START
-                pluggedMedia = None
-                for pM in job.user.pluggedMedia:
-                    pluggedMedia = pM
-                    break
-                # TEMP BLOCK --- END
-                self.app.object_store.delete(job, user=job.user, pluggedMedia=pluggedMedia,
-                                             base_dir='job_work', entire_dir=True, dir_only=True, obj_dir=True)
+                self.app.object_store.delete(self.get_job(), base_dir='job_work', entire_dir=True, dir_only=True, obj_dir=True)
         except:
             log.exception( "Unable to cleanup job %d", self.job_id )
 
@@ -1569,7 +1567,6 @@ def get_input_paths( self, job=None ):
 
     def get_output_fnames( self ):
         if self.output_paths is None:
-            job = self.get_job()
             self.compute_outputs()
         return self.output_paths
 
@@ -1595,14 +1592,14 @@ def compute_outputs( self ):
         for da in job.output_datasets + job.output_library_datasets:
             da_false_path = dataset_path_rewriter.rewrite_dataset_path( da.dataset, 'output' )
             mutable = da.dataset.dataset.external_filename is None
-            dataset_path = DatasetPath( da.dataset.dataset.id, da.dataset.get_file_name(), false_path=da_false_path, mutable=mutable )
+            dataset_path = DatasetPath( da.dataset.dataset.id, da.dataset.file_name, false_path=da_false_path, mutable=mutable )
             results.append( ( da.name, da.dataset, dataset_path ) )
 
         self.output_paths = [t[2] for t in results]
         self.output_hdas_and_paths = dict([(t[0], t[1:]) for t in results])
         if special:
             false_path = dataset_path_rewriter.rewrite_dataset_path( special.dataset, 'output' )
-            dsp = DatasetPath( special.dataset.id, special.dataset.get_file_name( job.user ), false_path )
+            dsp = DatasetPath( special.dataset.id, special.dataset.file_name, false_path )
             self.output_paths.append( dsp )
         return self.output_paths