Skip to content

Commit

Permalink
Merge pull request #310 from LabKey/fb_merge_24.7_to_develop
Browse files Browse the repository at this point in the history
Merge discvr-24.7 to develop
  • Loading branch information
bbimber authored Aug 28, 2024
2 parents 1a03f83 + fad7e62 commit a3283a9
Show file tree
Hide file tree
Showing 26 changed files with 605 additions and 305 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ Ext4.define('SequenceAnalysis.panel.AnalysisSectionPanel', {
title: 'Add Steps',
border: false,
width: 800,
autoScroll: true,
maxHeight: '90%',
items: items,
buttons: [{
text: 'Done',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
containerPath: this.queryContainer,
schemaName: 'sequenceanalysis',
queryName: 'readdata',
columns: 'rowid,readset,readset/name,container,container/displayName,container/path,fileid1,fileid1/name,fileid1/fileexists,fileid2,fileid2/name,fileid2/fileexists',
columns: 'rowid,readset,readset/name,container,container/displayName,container/path,fileid1,fileid1/name,fileid1/fileexists,fileid2,fileid2/name,fileid2/fileexists,sra_accession',
metadata: {
queryContainerPath: {
createIfDoesNotExist: true,
Expand All @@ -160,11 +160,17 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
load: function (store) {
var errors = [];
var errorNames = [];
var archived = [];
store.each(function(rec){
if (rec.get('fileid1')){
if (!rec.get('fileid1/fileexists')){
errors.push(rec);
errorNames.push(rec.get('readset/name'));
if (!rec.get('sra_accession')) {
errors.push(rec);
errorNames.push(rec.get('readset/name'));
}
else {
archived.push(rec.get('readset/name'))
}
}
else {
this.fileIds.push(rec.get('fileid1'));
Expand All @@ -178,8 +184,13 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {

if (rec.get('fileid2')){
if (!rec.get('fileid2/fileexists')){
errors.push(rec);
errorNames.push(rec.get('name'))
if (!rec.get('sra_accession')) {
errors.push(rec);
errorNames.push(rec.get('name'))
}
else {
archived.push(rec.get('name'));
}
}
else {
this.fileIds.push(rec.get('fileid2'));
Expand All @@ -188,7 +199,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
}
}, this);

this.onStoreLoad(errorNames);
this.onStoreLoad(errorNames, archived);

var target = this.down('#readsetCount');
if (target) {
Expand All @@ -201,13 +212,18 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {

storesLoaded: 0,
errorNames: [],
archivedNames: [],

onStoreLoad: function(errorNames){
onStoreLoad: function(errorNames, archivedNames){
this.storesLoaded++;
if (errorNames){
this.errorNames = this.errorNames.concat(errorNames);
this.errorNames = Ext4.unique(this.errorNames);
}

if (archivedNames) {
this.archivedNames = Ext4.unique(this.archivedNames.concat(archivedNames));
}
if (this.storesLoaded === 2){
this.afterStoreLoad();
}
Expand All @@ -225,7 +241,10 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
dv.refresh();

if (this.errorNames.length){
alert('The follow readsets lack an input file and will be skipped: ' + this.errorNames.join(', '));
alert('The following readsets lack an input file and will be skipped: ' + this.errorNames.join(', '));
}
else if (this.archivedNames.length) {
Ext4.Msg.alert('Warning', 'One or more readsets contains SRA archived data. Please choose the option to auto-download these data');
}
},

Expand Down Expand Up @@ -326,6 +345,14 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', {
uncheckedValue: false,
checked: false,
xtype: 'checkbox'
},{
fieldLabel: 'Restore SRA Data If Needed',
helpPopup: 'If selected, any archived sequence data that contains an SRA accession will be re-downloaded to a temp location',
name: 'doSraDownloadIfNeeded',
inputValue: true,
uncheckedValue: false,
checked: true,
xtype: 'checkbox'
}, this.getSaveTemplateCfg(),{
fieldLabel: 'Submit Jobs To Same Folder/Workbook As Readset?',
helpPopup: 'By default, the pipelines jobs and their outputs will be created in the workbook you selected. However, in certain cases, such as bulk submission of many jobs, it might be preferable to submit each job to the source folder/workbook for each input. Checking this box will enable this.',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.labkey.sequenceanalysis.analysis;

import com.google.common.io.Files;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
Expand All @@ -22,6 +23,7 @@
import org.labkey.api.sequenceanalysis.model.Readset;
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner;
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
Expand Down Expand Up @@ -111,6 +113,10 @@ else if (SequenceUtil.FILETYPE.gvcf.getFileType().isType(so.getFile()) | Sequenc
{
getAndValidateHeaderForVcf(so, newRsName);
}
else
{
throw new PipelineJobException("Unexpected file type: " + so.getFile().getPath());
}

ctx.getSequenceSupport().cacheObject("readsetId", newRsName);
}
Expand Down Expand Up @@ -207,6 +213,18 @@ private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName
String existingSample = header.getGenotypeSamples().get(0);

File sampleNamesFile = new File(ctx.getWorkingDirectory(), "sampleNames.txt");
if (!sampleNamesFile.exists())
{
try
{
Files.touch(sampleNamesFile);
}
catch (IOException e)
{
throw new PipelineJobException(e);
}
}

try (PrintWriter writer = PrintWriters.getPrintWriter(sampleNamesFile, StandardOpenOption.APPEND))
{
writer.println(newRsName);
Expand All @@ -225,11 +243,19 @@ private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName
try
{
File outputIdx = SequenceAnalysisService.get().ensureVcfIndex(outputVcf, ctx.getLogger(), false);
FileUtils.moveFile(outputVcf, so.getFile(), StandardCopyOption.REPLACE_EXISTING);
if (so.getFile().exists())
{
so.getFile().delete();
}
FileUtils.moveFile(outputVcf, so.getFile());

FileType gz = new FileType(".gz");
File inputIndex = gz.isType(so.getFile()) ? new File(so.getFile().getPath() + ".tbi") : new File(so.getFile().getPath() + FileExtensions.TRIBBLE_INDEX);
FileUtils.moveFile(outputIdx, inputIndex, StandardCopyOption.REPLACE_EXISTING);
if (inputIndex.exists())
{
inputIndex.delete();
}
FileUtils.moveFile(outputIdx, inputIndex);

addTracker(so, existingSample, newRsName);
}
Expand All @@ -243,6 +269,11 @@ private void addTracker(SequenceOutputFile so, String existingSample, String new
{
File tracker = new File(so.getFile().getParentFile(), "reheaderHistory.txt");
boolean preExisting = tracker.exists();
if (!preExisting)
{
Files.touch(tracker);
}

try (PrintWriter writer = PrintWriters.getPrintWriter(tracker, StandardOpenOption.APPEND))
{
if (!preExisting)
Expand Down Expand Up @@ -279,20 +310,36 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new
throw new PipelineJobException("Expected header was not created: " + headerBam.getPath());
}

ReferenceGenome rg = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id());
if (rg == null)
{
throw new PipelineJobException("Unable to find genome: " + so.getLibrary_id());
}

ctx.getFileManager().addIntermediateFile(headerBam);
ctx.getFileManager().addIntermediateFile(SequencePipelineService.get().getExpectedIndex(headerBam));

File output = new File(ctx.getWorkingDirectory(), so.getFile().getName());
new ReplaceSamHeaderWrapper(ctx.getLogger()).execute(so.getFile(), output, headerBam);
new ReplaceSamHeaderWrapper(ctx.getLogger()).execute(so.getFile(), output, headerBam, rg);
if (!output.exists())
{
throw new PipelineJobException("Missing file: " + output.getPath());
}

File outputIdx = SequencePipelineService.get().ensureBamIndex(output, ctx.getLogger(), false);

FileUtils.moveFile(output, so.getFile(), StandardCopyOption.REPLACE_EXISTING);
FileUtils.moveFile(outputIdx, SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()), StandardCopyOption.REPLACE_EXISTING);
if (so.getFile().exists())
{
so.getFile().delete();
}
FileUtils.moveFile(output, so.getFile());

File inputIndex = SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile());
if (inputIndex.exists())
{
inputIndex.delete();
}
FileUtils.moveFile(outputIdx, inputIndex);

addTracker(so, existingSample, newRsName);
}
Expand All @@ -315,7 +362,7 @@ protected String getToolName()
return "ReplaceSamHeader";
}

public void execute(File input, File output, File headerBam) throws PipelineJobException
public void execute(File input, File output, File headerBam, ReferenceGenome genome) throws PipelineJobException
{
List<String> params = new ArrayList<>(getBaseArgs());

Expand All @@ -328,6 +375,9 @@ public void execute(File input, File output, File headerBam) throws PipelineJobE
params.add("--HEADER");
params.add(headerBam.getPath());

params.add("-R");
params.add(genome.getWorkingFastaFile().getPath());

execute(params);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.labkey.api.pipeline.RecordedAction;
import org.labkey.api.pipeline.RecordedActionSet;
import org.labkey.api.pipeline.WorkDirectoryTask;
import org.labkey.api.sequenceanalysis.model.ReadData;
import org.labkey.api.sequenceanalysis.pipeline.AbstractSequenceTaskFactory;
import org.labkey.api.sequenceanalysis.pipeline.AlignmentStep;
import org.labkey.api.sequenceanalysis.pipeline.AnalysisStep;
Expand Down Expand Up @@ -106,7 +107,15 @@ public RecordedActionSet run() throws PipelineJobException

if (getPipelineJob().getReadset().hasArchivedData())
{
throw new PipelineJobException("The input readset has archived read data and cannot be used for new alignments");
if (!getPipelineJob().shouldAllowArchivedReadsets())
{
throw new PipelineJobException("The input readset has archived read data and cannot be used for new alignments");
}

if (getPipelineJob().getReadset().getReadData().stream().filter(ReadData::isArchived).filter(rd -> rd.getSra_accession() == null).count() > 1)
{
throw new PipelineJobException("The input readset has archived readsets that lack SRA accessions");
}
}

getHelper().cacheExpDatasForParams();
Expand Down
Loading

0 comments on commit a3283a9

Please sign in to comment.