Merge pull request #310 from LabKey/fb_merge_24.7_to_develop

Merge discvr-24.7 to develop
LabKey · Aug 28, 2024 · a3283a9 · a3283a9
2 parents 1a03f83 + fad7e62
commit a3283a9
Show file tree

Hide file tree

Showing 26 changed files with 605 additions and 305 deletions.
diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AnalysisSectionPanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AnalysisSectionPanel.js
@@ -203,6 +203,8 @@ Ext4.define('SequenceAnalysis.panel.AnalysisSectionPanel', {
             title: 'Add Steps',
             border: false,
             width: 800,
+            autoScroll: true,
+            maxHeight: '90%',
             items: items,
             buttons: [{
                 text: 'Done',

diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js
@@ -142,7 +142,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
             containerPath: this.queryContainer,
             schemaName: 'sequenceanalysis',
             queryName: 'readdata',
-            columns: 'rowid,readset,readset/name,container,container/displayName,container/path,fileid1,fileid1/name,fileid1/fileexists,fileid2,fileid2/name,fileid2/fileexists',
+            columns: 'rowid,readset,readset/name,container,container/displayName,container/path,fileid1,fileid1/name,fileid1/fileexists,fileid2,fileid2/name,fileid2/fileexists,sra_accession',
             metadata: {
                 queryContainerPath: {
                     createIfDoesNotExist: true,
@@ -160,11 +160,17 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
                 load: function (store) {
                     var errors = [];
                     var errorNames = [];
+                    var archived = [];
                     store.each(function(rec){
                         if (rec.get('fileid1')){
                             if (!rec.get('fileid1/fileexists')){
-                                errors.push(rec);
-                                errorNames.push(rec.get('readset/name'));
+                                if (!rec.get('sra_accession')) {
+                                    errors.push(rec);
+                                    errorNames.push(rec.get('readset/name'));
+                                }
+                                else {
+                                    archived.push(rec.get('readset/name'))
+                                }
                             }
                             else {
                                 this.fileIds.push(rec.get('fileid1'));
@@ -178,8 +184,13 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
 
                         if (rec.get('fileid2')){
                             if (!rec.get('fileid2/fileexists')){
-                                errors.push(rec);
-                                errorNames.push(rec.get('name'))
+                                if (!rec.get('sra_accession')) {
+                                    errors.push(rec);
+                                    errorNames.push(rec.get('name'))
+                                }
+                                else {
+                                    archived.push(rec.get('name'));
+                                }
                             }
                             else {
                                 this.fileIds.push(rec.get('fileid2'));
@@ -188,7 +199,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
                         }
                     }, this);
 
-                    this.onStoreLoad(errorNames);
+                    this.onStoreLoad(errorNames, archived);
 
                     var target = this.down('#readsetCount');
                     if (target) {
@@ -201,13 +212,18 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
 
     storesLoaded: 0,
     errorNames: [],
+    archivedNames: [],
 
-    onStoreLoad: function(errorNames){
+    onStoreLoad: function(errorNames, archivedNames){
         this.storesLoaded++;
         if (errorNames){
             this.errorNames = this.errorNames.concat(errorNames);
             this.errorNames = Ext4.unique(this.errorNames);
         }
+
+        if (archivedNames) {
+            this.archivedNames = Ext4.unique(this.archivedNames.concat(archivedNames));
+        }
         if (this.storesLoaded === 2){
             this.afterStoreLoad();
         }
@@ -225,7 +241,10 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
         dv.refresh();
 
         if (this.errorNames.length){
-            alert('The follow readsets lack an input file and will be skipped: ' + this.errorNames.join(', '));
+            alert('The following readsets lack an input file and will be skipped: ' + this.errorNames.join(', '));
+        }
+        else if (this.archivedNames.length) {
+            Ext4.Msg.alert('Warning', 'One or more readsets contains SRA archived data. Please choose the option to auto-download these data');
         }
     },
 
@@ -326,6 +345,14 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
                 uncheckedValue: false,
                 checked: false,
                 xtype: 'checkbox'
+            },{
+                fieldLabel: 'Restore SRA Data If Needed',
+                helpPopup: 'If selected, any archived sequence data that contains an SRA accession will be re-downloaded to a temp location',
+                name: 'doSraDownloadIfNeeded',
+                inputValue: true,
+                uncheckedValue: false,
+                checked: true,
+                xtype: 'checkbox'
             }, this.getSaveTemplateCfg(),{
                 fieldLabel: 'Submit Jobs To Same Folder/Workbook As Readset?',
                 helpPopup: 'By default, the pipelines jobs and their outputs will be created in the workbook you selected. However, in certain cases, such as bulk submission of many jobs, it might be preferable to submit each job to the source folder/workbook for each input. Checking this box will enable this.',

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java
@@ -1,5 +1,6 @@
 package org.labkey.sequenceanalysis.analysis;
 
+import com.google.common.io.Files;
 import htsjdk.samtools.SAMFileHeader;
 import htsjdk.samtools.SAMFileWriter;
 import htsjdk.samtools.SAMFileWriterFactory;
@@ -22,6 +23,7 @@
 import org.labkey.api.sequenceanalysis.model.Readset;
 import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
 import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner;
+import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
 import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
 import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
 import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
@@ -111,6 +113,10 @@ else if (SequenceUtil.FILETYPE.gvcf.getFileType().isType(so.getFile()) | Sequenc
             {
                 getAndValidateHeaderForVcf(so, newRsName);
             }
+            else
+            {
+                throw new PipelineJobException("Unexpected file type: " + so.getFile().getPath());
+            }
 
             ctx.getSequenceSupport().cacheObject("readsetId", newRsName);
         }
@@ -207,6 +213,18 @@ private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName
             String existingSample = header.getGenotypeSamples().get(0);
 
             File sampleNamesFile =  new File(ctx.getWorkingDirectory(), "sampleNames.txt");
+            if (!sampleNamesFile.exists())
+            {
+                try
+                {
+                    Files.touch(sampleNamesFile);
+                }
+                catch (IOException e)
+                {
+                    throw new PipelineJobException(e);
+                }
+            }
+
             try (PrintWriter writer = PrintWriters.getPrintWriter(sampleNamesFile, StandardOpenOption.APPEND))
             {
                 writer.println(newRsName);
@@ -225,11 +243,19 @@ private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName
             try
             {
                 File outputIdx = SequenceAnalysisService.get().ensureVcfIndex(outputVcf, ctx.getLogger(), false);
-                FileUtils.moveFile(outputVcf, so.getFile(), StandardCopyOption.REPLACE_EXISTING);
+                if (so.getFile().exists())
+                {
+                    so.getFile().delete();
+                }
+                FileUtils.moveFile(outputVcf, so.getFile());
 
                 FileType gz = new FileType(".gz");
                 File inputIndex = gz.isType(so.getFile()) ? new File(so.getFile().getPath() + ".tbi") : new File(so.getFile().getPath() + FileExtensions.TRIBBLE_INDEX);
-                FileUtils.moveFile(outputIdx, inputIndex, StandardCopyOption.REPLACE_EXISTING);
+                if (inputIndex.exists())
+                {
+                    inputIndex.delete();
+                }
+                FileUtils.moveFile(outputIdx, inputIndex);
 
                 addTracker(so, existingSample, newRsName);
             }
@@ -243,6 +269,11 @@ private void addTracker(SequenceOutputFile so, String existingSample, String new
         {
             File tracker = new File(so.getFile().getParentFile(), "reheaderHistory.txt");
             boolean preExisting = tracker.exists();
+            if (!preExisting)
+            {
+                Files.touch(tracker);
+            }
+
             try (PrintWriter writer = PrintWriters.getPrintWriter(tracker, StandardOpenOption.APPEND))
             {
                 if (!preExisting)
@@ -279,20 +310,36 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new
                     throw new PipelineJobException("Expected header was not created: " + headerBam.getPath());
                 }
 
+                ReferenceGenome rg = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id());
+                if (rg == null)
+                {
+                    throw new PipelineJobException("Unable to find genome: " + so.getLibrary_id());
+                }
+
                 ctx.getFileManager().addIntermediateFile(headerBam);
                 ctx.getFileManager().addIntermediateFile(SequencePipelineService.get().getExpectedIndex(headerBam));
 
                 File output = new File(ctx.getWorkingDirectory(), so.getFile().getName());
-                new ReplaceSamHeaderWrapper(ctx.getLogger()).execute(so.getFile(), output, headerBam);
+                new ReplaceSamHeaderWrapper(ctx.getLogger()).execute(so.getFile(), output, headerBam, rg);
                 if (!output.exists())
                 {
                     throw new PipelineJobException("Missing file: " + output.getPath());
                 }
 
                 File outputIdx = SequencePipelineService.get().ensureBamIndex(output, ctx.getLogger(), false);
 
-                FileUtils.moveFile(output, so.getFile(), StandardCopyOption.REPLACE_EXISTING);
-                FileUtils.moveFile(outputIdx, SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()), StandardCopyOption.REPLACE_EXISTING);
+                if (so.getFile().exists())
+                {
+                    so.getFile().delete();
+                }
+                FileUtils.moveFile(output, so.getFile());
+
+                File inputIndex = SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile());
+                if (inputIndex.exists())
+                {
+                    inputIndex.delete();
+                }
+                FileUtils.moveFile(outputIdx, inputIndex);
 
                 addTracker(so, existingSample, newRsName);
             }
@@ -315,7 +362,7 @@ protected String getToolName()
                 return "ReplaceSamHeader";
             }
 
-            public void execute(File input, File output, File headerBam) throws PipelineJobException
+            public void execute(File input, File output, File headerBam, ReferenceGenome genome) throws PipelineJobException
             {
                 List<String> params = new ArrayList<>(getBaseArgs());
 
@@ -328,6 +375,9 @@ public void execute(File input, File output, File headerBam) throws PipelineJobE
                 params.add("--HEADER");
                 params.add(headerBam.getPath());
 
+                params.add("-R");
+                params.add(genome.getWorkingFastaFile().getPath());
+
                 execute(params);
             }
         }

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/AlignmentInitTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/AlignmentInitTask.java
@@ -5,6 +5,7 @@
 import org.labkey.api.pipeline.RecordedAction;
 import org.labkey.api.pipeline.RecordedActionSet;
 import org.labkey.api.pipeline.WorkDirectoryTask;
+import org.labkey.api.sequenceanalysis.model.ReadData;
 import org.labkey.api.sequenceanalysis.pipeline.AbstractSequenceTaskFactory;
 import org.labkey.api.sequenceanalysis.pipeline.AlignmentStep;
 import org.labkey.api.sequenceanalysis.pipeline.AnalysisStep;
@@ -106,7 +107,15 @@ public RecordedActionSet run() throws PipelineJobException
 
         if (getPipelineJob().getReadset().hasArchivedData())
         {
-            throw new PipelineJobException("The input readset has archived read data and cannot be used for new alignments");
+            if (!getPipelineJob().shouldAllowArchivedReadsets())
+            {
+                throw new PipelineJobException("The input readset has archived read data and cannot be used for new alignments");
+            }
+
+            if (getPipelineJob().getReadset().getReadData().stream().filter(ReadData::isArchived).filter(rd -> rd.getSra_accession() == null).count() > 1)
+            {
+                throw new PipelineJobException("The input readset has archived readsets that lack SRA accessions");
+            }
         }
 
         getHelper().cacheExpDatasForParams();