From 32706596d54aeba6cc13c05da0a13d1cb3caed8a Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 2 Aug 2024 06:34:31 -0700 Subject: [PATCH 01/41] Show cellranger version - also rename multi-chain TRA-DV segments --- .../run/CellRangerFeatureBarcodeHandler.java | 8 ++++-- .../run/CellRangerGexCountStep.java | 17 +++++++----- .../singlecell/run/CellRangerVDJWrapper.java | 26 ++++++++++++++++++- .../singlecell/run/CellRangerWrapper.java | 23 +++++++++++++--- 4 files changed, 61 insertions(+), 13 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerFeatureBarcodeHandler.java b/singlecell/src/org/labkey/singlecell/run/CellRangerFeatureBarcodeHandler.java index 22b583814..60a265113 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerFeatureBarcodeHandler.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerFeatureBarcodeHandler.java @@ -309,7 +309,11 @@ private void processType(List> inputFastqs, AlignmentOutputImpl } FileUtils.moveFile(outputHtml, outputHtmlRename); - String description = ctx.getParams().optBoolean("useGEX", false) ? "HTO and GEX Counts" : null; + String description = "Version: " + wrapper.getVersionString(); + if (ctx.getParams().optBoolean("useGEX", false)) + { + description = description + "\n" + "HTO and GEX Counts"; + }; output.addSequenceOutput(outputHtmlRename, rs.getName() + " 10x " + rs.getApplication() + " Summary", "10x Run Summary", rs.getRowId(), null, null, description); File rawCounts = new File(outsdir, "raw_feature_bc_matrix/matrix.mtx.gz"); @@ -366,7 +370,7 @@ private File makeDummyIndex(JobContext ctx) throws PipelineJobException CellRangerWrapper wrapper = new CellRangerWrapper(ctx.getLogger()); List args = new ArrayList<>(); - args.add(wrapper.getExe(true).getPath()); + args.add(wrapper.getExe().getPath()); args.add("mkref"); args.add("--fasta=" + fasta.getPath()); args.add("--genes=" + gtf.getPath()); diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java index 96da22169..467d9db4b 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java @@ -124,8 +124,9 @@ public static List getCellRangerGexParams(@Nullable Lis ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("--chemistry"), "chemistry", "Chemistry", "This is usually left blank, in which case cellranger will auto-detect. Example values are: SC3Pv1, SC3Pv2, SC3Pv3, SC5P-PE, SC5P-R2, or SC5P-R1", "textfield", new JSONObject(){{ }}, null), - ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--include-introns"), "includeIntrons", "Include Introns", "If checked, reads from introns will be included in the counts", "checkbox", new JSONObject(){{ - + ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--include-introns"), "includeIntrons", "Include Introns", "If selected, reads from introns will be included in the counts", "ldk-simplecombo", new JSONObject(){{ + put("storeValues", "true;false"); + put("value", "false"); }}, null) ); @@ -147,10 +148,10 @@ public boolean supportsGzipFastqs() @Override public String getAlignmentDescription() { - return getAlignDescription(getProvider(), getPipelineCtx(), getStepIdx(), true); + return getAlignDescription(getProvider(), getPipelineCtx(), getStepIdx(), true, getWrapper().getVersionString()); } - protected static String getAlignDescription(PipelineStepProvider provider, PipelineContext ctx, int stepIdx, boolean addAligner) + protected static String getAlignDescription(PipelineStepProvider provider, PipelineContext ctx, int stepIdx, boolean addAligner, String cellrangerVersion) { Integer gtfId = provider.getParameterByName("gtfFile").extractValue(ctx.getJob(), provider, stepIdx, Integer.class); File gtfFile = ctx.getSequenceSupport().getCachedData(gtfId); @@ -174,7 +175,9 @@ protected static String getAlignDescription(PipelineStepProvider provider, Pipel lines.add("GTF: " + gtfFile.getName()); } - return lines.isEmpty() ? null : StringUtils.join(lines, '\n'); + lines.add("Version: " + cellrangerVersion); + + return StringUtils.join(lines, '\n'); } @Override @@ -271,7 +274,7 @@ public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir) } List args = new ArrayList<>(); - args.add(getWrapper().getExe(true).getPath()); + args.add(getWrapper().getExe().getPath()); args.add("mkref"); args.add("--fasta=" + referenceGenome.getWorkingFastaFile().getPath()); args.add("--genes=" + gtfFile.getPath()); @@ -393,7 +396,7 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu outputHtmlRename.delete(); } FileUtils.moveFile(outputHtml, outputHtmlRename); - String description = getAlignDescription(getProvider(), getPipelineCtx(), getStepIdx(), false); + String description = getAlignDescription(getProvider(), getPipelineCtx(), getStepIdx(), false, getWrapper().getVersionString()); output.addSequenceOutput(outputHtmlRename, rs.getName() + " 10x Count Summary", "10x Run Summary", rs.getRowId(), null, referenceGenome.getGenomeId(), description); File loupe = new File(outdir, "cloupe.cloupe"); diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index 633214b51..1feb9af6c 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -167,6 +167,13 @@ public void init(SequenceAnalysisJobSupport support) throws PipelineJobException { lineage = lineage.replace("DV", "/DV"); } + + // NOTE: cellranger expects TRDV segment to start with TRDV, so re-order + if ("TRD".equals(locus)) + { + String[] tokens = lineage.split("/"); + lineage = "TR" + tokens[1] + "/" + tokens[0]; + } } StringBuilder header = new StringBuilder(); @@ -421,7 +428,8 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List inp } FileUtils.moveFile(outputHtml, outputHtmlRename); - output.addSequenceOutput(outputHtmlRename, rs.getName() + " 10x VDJ Summary", "10x Run Summary", rs.getRowId(), null, referenceGenome.getGenomeId(), null); + String versionString = "Version: " + getWrapper().getVersionString(); + output.addSequenceOutput(outputHtmlRename, rs.getName() + " 10x VDJ Summary", "10x Run Summary", rs.getRowId(), null, referenceGenome.getGenomeId(), versionString); } catch (IOException e) { @@ -905,4 +913,20 @@ else if (uniqueChains.size() == 2) log.info("\tChimeric calls recovered: " + chimericCallsRecovered); } } + + public @Nullable String getVersionString() + { + try + { + String ret = executeWithOutput(Arrays.asList(getExe().getPath(), "--version")); + + return ret.replaceAll("^cellranger ", ""); + } + catch (PipelineJobException e) + { + getLogger().error("Unable to find cellRanger version"); + + return "Unknown"; + } + } } diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerWrapper.java index 74c964031..80ef3c95c 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerWrapper.java @@ -15,6 +15,7 @@ import java.io.IOException; import java.nio.file.Files; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -32,9 +33,9 @@ public CellRangerWrapper(@Nullable Logger logger) super(logger); } - protected File getExe(boolean use31) + protected File getExe() { - return SequencePipelineService.get().getExeForPackage("CELLRANGERPATH", "cellranger" + (use31 ? "-31" : "")); + return SequencePipelineService.get().getExeForPackage("CELLRANGERPATH", "cellranger"); } public static Set getRawDataDirs(File outputDir, boolean filteredOnly, boolean includeAnalysis) @@ -75,7 +76,7 @@ public File getLocalFastqDir(File outputDirectory) public List prepareCountArgs(AlignmentOutputImpl output, String id, File outputDirectory, Readset rs, List> inputFastqPairs, List extraArgs, boolean writeFastqArgs) throws PipelineJobException { List args = new ArrayList<>(); - args.add(getExe(false).getPath()); + args.add(getExe().getPath()); args.add("count"); args.add("--id=" + id); @@ -259,4 +260,20 @@ public static String getId(String idParam, Readset rs) return id; } + + public @Nullable String getVersionString() + { + try + { + String ret = executeWithOutput(Arrays.asList(getExe().getPath(), "--version")); + + return ret.replaceAll("^cellranger ", ""); + } + catch (PipelineJobException e) + { + getLogger().error("Unable to find cellRanger version"); + + return "Unknown"; + } + } } From 9669c748591902a9654d88cd094dabc759c888ed Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 4 Aug 2024 09:56:33 -0700 Subject: [PATCH 02/41] Improve handling of TRA-DV lineages --- .../singlecell/run/CellRangerVDJWrapper.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index 1feb9af6c..d937bd71a 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -159,6 +159,7 @@ public void init(SequenceAnalysisJobSupport support) throws PipelineJobException // Special-case TRAVxx/DVxx lineages: if (lineage.startsWith("TRA") && lineage.contains("DV") && !lineage.contains("/DV")) { + getPipelineCtx().getLogger().debug("Updating -DV to /DV in: " + lineage); if (lineage.contains("-DV")) { lineage = lineage.replace("-DV", "/DV"); @@ -167,13 +168,15 @@ public void init(SequenceAnalysisJobSupport support) throws PipelineJobException { lineage = lineage.replace("DV", "/DV"); } + } - // NOTE: cellranger expects TRDV segment to start with TRDV, so re-order - if ("TRD".equals(locus)) - { - String[] tokens = lineage.split("/"); - lineage = "TR" + tokens[1] + "/" + tokens[0]; - } + // NOTE: cellranger expects TRDV segment to start with TRDV, so re-order + if ("TRD".equals(locus) && lineage.startsWith("TRA")) + { + getPipelineCtx().getLogger().debug("Editing TRA/DV lineage: " + lineage); + String[] tokens = lineage.split("/"); + lineage = "TR" + tokens[1] + "/" + tokens[0]; + getPipelineCtx().getLogger().debug("to: " + lineage); } StringBuilder header = new StringBuilder(); From 960c48b22f1affaa0018fd325bcb38c57e26926f Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 4 Aug 2024 10:22:48 -0700 Subject: [PATCH 03/41] Move getLocationForCachedInputs to SequenceJob --- .../pipeline/SequenceJob.java | 32 +++++++++++++++++++ .../pipeline/VariantProcessingJob.java | 30 ----------------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java index b47535536..08a1b8dc3 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java @@ -28,6 +28,7 @@ import org.labkey.api.pipeline.TaskFactory; import org.labkey.api.pipeline.TaskId; import org.labkey.api.pipeline.TaskPipeline; +import org.labkey.api.pipeline.WorkDirectory; import org.labkey.api.pipeline.file.AbstractFileAnalysisJob; import org.labkey.api.pipeline.file.FileAnalysisJobSupport; import org.labkey.api.reader.Readers; @@ -112,6 +113,7 @@ protected SequenceJob(SequenceJob parentJob, String jobName, String subdirectory protected boolean shouldAllowArchivedReadsets() { + // TODO: conditional about allowing re-download return false; } @@ -610,4 +612,34 @@ public void testSerializeSupport() throws Exception assertNull("Support not null after deserialize", deserializedJob._support); } } + + public File getLocationForCachedInputs(WorkDirectory wd, boolean createIfDoesntExist) + { + File ret; + + String localDir = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath("LOCAL_DATA_CACHE_DIR"); + if (localDir == null) + { + localDir = StringUtils.trimToNull(System.getenv("LOCAL_DATA_CACHE_DIR")); + } + + if (localDir == null) + { + ret = new File(wd.getDir(), "cachedData"); + } + else + { + String guid = getParentGUID() == null ? getJobGUID() : getParentGUID(); + ret = new File(localDir, guid); + + getLogger().debug("Using local directory to cache data: " + ret.getPath()); + } + + if (createIfDoesntExist && !ret.exists()) + { + ret.mkdirs(); + } + + return ret; + } } \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/VariantProcessingJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/VariantProcessingJob.java index 444adf5d7..101d81c51 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/VariantProcessingJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/VariantProcessingJob.java @@ -413,34 +413,4 @@ public File getDataDirectory() assertEquals(intervalMap, intervalMap2); } } - - public File getLocationForCachedInputs(WorkDirectory wd, boolean createIfDoesntExist) - { - File ret; - - String localDir = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath("LOCAL_DATA_CACHE_DIR"); - if (localDir == null) - { - localDir = StringUtils.trimToNull(System.getenv("LOCAL_DATA_CACHE_DIR")); - } - - if (localDir == null) - { - ret = new File(wd.getDir(), "cachedData"); - } - else - { - String guid = getParentGUID() == null ? getJobGUID() : getParentGUID(); - ret = new File(localDir, guid); - - getLogger().debug("Using local directory to cache data: " + ret.getPath()); - } - - if (createIfDoesntExist && !ret.exists()) - { - ret.mkdirs(); - } - - return ret; - } } From 326265a7149da04df352b8f0a3f0edc9fe373d07 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 4 Aug 2024 10:48:14 -0700 Subject: [PATCH 04/41] Allow alignment jobs to re-download archived SRA data --- .../panel/SequenceAnalysisPanel.js | 8 +++ .../pipeline/AlignmentInitTask.java | 11 +++- .../pipeline/SequenceAlignmentTask.java | 64 +++++++++++++++++++ .../pipeline/SequenceJob.java | 3 +- 4 files changed, 83 insertions(+), 3 deletions(-) diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js index dfaa1e07d..ab059b6c5 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js @@ -326,6 +326,14 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', { uncheckedValue: false, checked: false, xtype: 'checkbox' + },{ + fieldLabel: 'Restore SRA Data If Needed', + helpPopup: 'If selected, any archived sequence data that contains an SRA accession will be re-downloaded to a temp location', + name: 'doSraDownloadIfNeeded', + inputValue: true, + uncheckedValue: false, + checked: true, + xtype: 'checkbox' }, this.getSaveTemplateCfg(),{ fieldLabel: 'Submit Jobs To Same Folder/Workbook As Readset?', helpPopup: 'By default, the pipelines jobs and their outputs will be created in the workbook you selected. However, in certain cases, such as bulk submission of many jobs, it might be preferable to submit each job to the source folder/workbook for each input. Checking this box will enable this.', diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/AlignmentInitTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/AlignmentInitTask.java index 203df59db..4636cc2e6 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/AlignmentInitTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/AlignmentInitTask.java @@ -5,6 +5,7 @@ import org.labkey.api.pipeline.RecordedAction; import org.labkey.api.pipeline.RecordedActionSet; import org.labkey.api.pipeline.WorkDirectoryTask; +import org.labkey.api.sequenceanalysis.model.ReadData; import org.labkey.api.sequenceanalysis.pipeline.AbstractSequenceTaskFactory; import org.labkey.api.sequenceanalysis.pipeline.AlignmentStep; import org.labkey.api.sequenceanalysis.pipeline.AnalysisStep; @@ -106,7 +107,15 @@ public RecordedActionSet run() throws PipelineJobException if (getPipelineJob().getReadset().hasArchivedData()) { - throw new PipelineJobException("The input readset has archived read data and cannot be used for new alignments"); + if (!getPipelineJob().shouldAllowArchivedReadsets()) + { + throw new PipelineJobException("The input readset has archived read data and cannot be used for new alignments"); + } + + if (getPipelineJob().getReadset().getReadData().stream().filter(ReadData::isArchived).filter(rd -> rd.getSra_accession() == null).count() > 1) + { + throw new PipelineJobException("The input readset has archived readsets that lack SRA accessions"); + } } getHelper().cacheExpDatasForParams(); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index bc6a3ea03..43cd85141 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -18,6 +18,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.google.common.io.Files; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.ValidationStringency; import org.apache.commons.io.FileUtils; @@ -68,6 +69,7 @@ import org.labkey.api.util.Pair; import org.labkey.sequenceanalysis.ReadDataImpl; import org.labkey.sequenceanalysis.SequenceReadsetImpl; +import org.labkey.sequenceanalysis.run.RestoreSraDataHandler; import org.labkey.sequenceanalysis.run.bampostprocessing.SortSamStep; import org.labkey.sequenceanalysis.run.preprocessing.TrimmomaticWrapper; import org.labkey.sequenceanalysis.run.util.AddOrReplaceReadGroupsWrapper; @@ -238,6 +240,7 @@ public RecordedActionSet run() throws PipelineJobException try { SequenceReadsetImpl rs = getPipelineJob().getReadset(); + restoreArchivedReadDataIfNeeded(rs); copyReferenceResources(); //then we preprocess FASTQ files, if needed @@ -1858,5 +1861,66 @@ public void serializeTest() throws Exception file.delete(); } } + + private void restoreArchivedReadDataIfNeeded(Readset rs) throws PipelineJobException + { + for (ReadData rd : rs.getReadData()) + { + if (! (rd instanceof ReadDataImpl rdi)) + { + throw new PipelineJobException("Expected readdata to be a ReadDataImpl"); + } + + if (rd.isArchived()) + { + getPipelineJob().getLogger().info("Restoring files for readdata: " + rd.getRowid() + " / " + rd.getSra_accession()); + if (!getPipelineJob().shouldAllowArchivedReadsets()) + { + throw new PipelineJobException("This job is not configured to allow archived readsets!"); + } + + if (rd.getSra_accession() == null) + { + throw new PipelineJobException("Missing SRA accession: " + rd.getRowid()); + } + + File outDir = new File(getHelper().getWorkingDirectory(), "cachedReadData"); + getTaskFileManagerImpl().addIntermediateFile(outDir); + + File doneFile = new File(outDir, rd.getSra_accession() + ".done"); + RestoreSraDataHandler.FastqDumpWrapper sra = new RestoreSraDataHandler.FastqDumpWrapper(getJob().getLogger()); + if (doneFile.exists()) + { + rdi.setFile(new File(outDir, rd.getSra_accession() + "_1.fastq"), 0); + if (rd.getFileId2() != null) + { + rdi.setFile(new File(outDir, rd.getSra_accession() + "_2.fastq"), 1); + } + } + else + { + if (!outDir.exists()) + { + outDir.mkdirs(); + } + + Pair downloaded = sra.downloadSra(rd.getSra_accession(), outDir); + rdi.setFile(downloaded.first, 0); + rdi.setFile(downloaded.second, 1); + + try + { + Files.touch(doneFile); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + rdi.setArchived(false); + } + } + } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java index 08a1b8dc3..bbea729de 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java @@ -113,8 +113,7 @@ protected SequenceJob(SequenceJob parentJob, String jobName, String subdirectory protected boolean shouldAllowArchivedReadsets() { - // TODO: conditional about allowing re-download - return false; + return ("true".equals(_params.get("doSraDownloadIfNeeded"))); } public SequenceJob(String providerName, Container c, User u, @Nullable String jobName, PipeRoot pipeRoot, JSONObject params, TaskId taskPipelineId, String folderPrefix) throws IOException From 8700018585915e8b725d7c42036b47eb3d2d858a Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 4 Aug 2024 11:29:05 -0700 Subject: [PATCH 05/41] Bugfix to handling of TRAV/DV segments --- .../labkey/singlecell/run/CellRangerVDJWrapper.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index d937bd71a..ac1e0e6c2 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -175,7 +175,7 @@ public void init(SequenceAnalysisJobSupport support) throws PipelineJobException { getPipelineCtx().getLogger().debug("Editing TRA/DV lineage: " + lineage); String[] tokens = lineage.split("/"); - lineage = "TR" + tokens[1] + "/" + tokens[0]; + lineage = "TR" + tokens[1] + "/" + tokens[0].replaceAll("^TR", ""); getPipelineCtx().getLogger().debug("to: " + lineage); } @@ -824,6 +824,7 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp { String line; int chimericCallsRecovered = 0; + int restoredTRDVAV = 0; int lineIdx = 0; while ((line = reader.readLine()) != null) @@ -841,6 +842,15 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp //Infer correct chain from the V, J and C genes String[] tokens = line.split(",", -1); // -1 used to preserve trailing empty strings + + // Restore original value for TRD/TRA + if (tokens[6].contains("TRDV") && tokens[6].contains("/") && tokens[6].contains("AV")) + { + restoredTRDVAV++; + String[] split = tokens[6].split("/"); + tokens[6] = "TR" + split[1] + "/" + split[0].replaceAll("TR", ""); + } + List chains = new ArrayList<>(); String vGeneChain = null; String jGeneChain = null; @@ -914,6 +924,7 @@ else if (uniqueChains.size() == 2) } log.info("\tChimeric calls recovered: " + chimericCallsRecovered); + log.info("\tTRDV/AV calls restored: " + restoredTRDVAV); } } From 2742759d62319b824c6984b3043b7ec40fd82ee5 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 4 Aug 2024 12:46:33 -0700 Subject: [PATCH 06/41] Bugfix when doSraDownloadIfNeeded not provided --- .../src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java index bbea729de..f4c1c4826 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java @@ -113,7 +113,7 @@ protected SequenceJob(SequenceJob parentJob, String jobName, String subdirectory protected boolean shouldAllowArchivedReadsets() { - return ("true".equals(_params.get("doSraDownloadIfNeeded"))); + return ("true".equals(_params.optString("doSraDownloadIfNeeded"))); } public SequenceJob(String providerName, Container c, User u, @Nullable String jobName, PipeRoot pipeRoot, JSONObject params, TaskId taskPipelineId, String folderPrefix) throws IOException From a57d9a38cca3afa2f247ffeec20622ed4654442a Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 4 Aug 2024 13:04:24 -0700 Subject: [PATCH 07/41] Primers should not be required for CellRangerVDJWrapper --- .../src/org/labkey/singlecell/run/CellRangerVDJWrapper.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index ac1e0e6c2..c4580acee 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -359,7 +359,10 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List inp { writer.println("[vdj]"); writer.println("reference," + indexDir.getPath()); - writer.println("inner-enrichment-primers," + primerFile); + if (primers != null) + { + writer.println("inner-enrichment-primers," + primerFile); + } writer.println(""); writer.println("[libraries]"); writer.println("fastq_id,fastqs,lanes,feature_types,subsample_rate"); From d8e475267e03cf8efea2f108485385a802e261ea Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 4 Aug 2024 14:54:14 -0700 Subject: [PATCH 08/41] Primers are required for CellRangerVDJWrapper --- .../singlecell/run/CellRangerVDJWrapper.java | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index c4580acee..f562f7284 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -81,6 +81,7 @@ public VDJProvider() ToolParameterDescriptor.create(INNER_ENRICHMENT_PRIMERS, "Inner Enrichment Primers", "An option comma-separated list of the inner primers used for TCR enrichment. These will be used for trimming.", "textarea", new JSONObject(){{ put("height", 100); put("width", 400); + put("allowBlank", false); }}, null) ), null, "https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger", true, false, false, ALIGNMENT_MODE.MERGE_THEN_ALIGN); } @@ -311,29 +312,31 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List inp File indexDir = AlignerIndexUtil.getIndexDir(referenceGenome, getIndexCachedDirName(getPipelineCtx().getJob())); String primers = StringUtils.trimToNull(getProvider().getParameterByName(INNER_ENRICHMENT_PRIMERS).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class, null)); - File primerFile = new File(outputDirectory, "primers.txt"); - if (primers != null) + if (primers == null) { - primers = primers.replaceAll("\\s+", ","); - primers = primers.replaceAll(",+", ","); + throw new PipelineJobException("Enrichment primers are required"); + } - try (PrintWriter writer = PrintWriters.getPrintWriter(primerFile)) - { - Arrays.stream(primers.split(",")).forEach(x -> { - x = StringUtils.trimToNull(x); - if (x != null) - { - writer.println(x); - } - }); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } + File primerFile = new File(outputDirectory, "primers.txt"); + primers = primers.replaceAll("\\s+", ","); + primers = primers.replaceAll(",+", ","); - output.addIntermediateFile(primerFile); + try (PrintWriter writer = PrintWriters.getPrintWriter(primerFile)) + { + Arrays.stream(primers.split(",")).forEach(x -> { + x = StringUtils.trimToNull(x); + if (x != null) + { + writer.println(x); + } + }); } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + output.addIntermediateFile(primerFile); Integer maxThreads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()); if (maxThreads != null) @@ -359,10 +362,7 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List inp { writer.println("[vdj]"); writer.println("reference," + indexDir.getPath()); - if (primers != null) - { - writer.println("inner-enrichment-primers," + primerFile); - } + writer.println("inner-enrichment-primers," + primerFile); writer.println(""); writer.println("[libraries]"); writer.println("fastq_id,fastqs,lanes,feature_types,subsample_rate"); From 2fd324b8e8c23831490ba8a9d7563e27a3f52233 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 4 Aug 2024 15:15:33 -0700 Subject: [PATCH 09/41] Allow client-side to accept SRA archived data --- .../panel/SequenceAnalysisPanel.js | 35 ++++++++++++++----- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js index ab059b6c5..a36c4fe1f 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js @@ -142,7 +142,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', { containerPath: this.queryContainer, schemaName: 'sequenceanalysis', queryName: 'readdata', - columns: 'rowid,readset,readset/name,container,container/displayName,container/path,fileid1,fileid1/name,fileid1/fileexists,fileid2,fileid2/name,fileid2/fileexists', + columns: 'rowid,readset,readset/name,container,container/displayName,container/path,fileid1,fileid1/name,fileid1/fileexists,fileid2,fileid2/name,fileid2/fileexists,sra_accession', metadata: { queryContainerPath: { createIfDoesNotExist: true, @@ -160,11 +160,17 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', { load: function (store) { var errors = []; var errorNames = []; + var archived = []; store.each(function(rec){ if (rec.get('fileid1')){ if (!rec.get('fileid1/fileexists')){ - errors.push(rec); - errorNames.push(rec.get('readset/name')); + if (!rec.get('sra_accession')) { + errors.push(rec); + errorNames.push(rec.get('readset/name')); + } + else { + archived.push(rec.get('readset/name')) + } } else { this.fileIds.push(rec.get('fileid1')); @@ -178,8 +184,13 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', { if (rec.get('fileid2')){ if (!rec.get('fileid2/fileexists')){ - errors.push(rec); - errorNames.push(rec.get('name')) + if (!rec.get('sra_accession')) { + errors.push(rec); + errorNames.push(rec.get('name')) + } + else { + archived.push(rec.get('name')); + } } else { this.fileIds.push(rec.get('fileid2')); @@ -188,7 +199,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', { } }, this); - this.onStoreLoad(errorNames); + this.onStoreLoad(errorNames, archived); var target = this.down('#readsetCount'); if (target) { @@ -201,13 +212,18 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', { storesLoaded: 0, errorNames: [], + archivedNames: [], - onStoreLoad: function(errorNames){ + onStoreLoad: function(errorNames, archivedNames){ this.storesLoaded++; if (errorNames){ this.errorNames = this.errorNames.concat(errorNames); this.errorNames = Ext4.unique(this.errorNames); } + + if (archivedNames) { + this.archivedNames = Ext4.unique(this.archivedNames.concat(archivedNames)); + } if (this.storesLoaded === 2){ this.afterStoreLoad(); } @@ -225,7 +241,10 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', { dv.refresh(); if (this.errorNames.length){ - alert('The follow readsets lack an input file and will be skipped: ' + this.errorNames.join(', ')); + alert('The following readsets lack an input file and will be skipped: ' + this.errorNames.join(', ')); + } + else if (this.archivedNames.length) { + Ext4.Msg.alert('Warning', 'One or more readsets contains SRA archived data. Please choose the option to auto-download these data'); } }, From 0df977e11077063532c9c7c6d8a99a581b61feb4 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 4 Aug 2024 20:15:55 -0700 Subject: [PATCH 10/41] Set job status in SequenceAlignmentTask --- .../sequenceanalysis/pipeline/SequenceAlignmentTask.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index 43cd85141..e89f61f67 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -294,6 +294,8 @@ private void logEnvironment() throws PipelineJobException private Map> performFastqPreprocessing(SequenceReadsetImpl rs) throws IOException, PipelineJobException { + getJob().setStatus(PipelineJob.TaskStatus.running, "Fastq Preprocessing"); + if (_resumer.isFastqPreprocessingDone()) { getJob().getLogger().info("resuming FASTQ preprocessing from saved state"); @@ -1873,6 +1875,7 @@ private void restoreArchivedReadDataIfNeeded(Readset rs) throws PipelineJobExcep if (rd.isArchived()) { + getJob().setStatus(PipelineJob.TaskStatus.running, "Downloading from SRA"); getPipelineJob().getLogger().info("Restoring files for readdata: " + rd.getRowid() + " / " + rd.getSra_accession()); if (!getPipelineJob().shouldAllowArchivedReadsets()) { From bcaf6bf2365a1ecc1c8072987485db82886f069e Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 5 Aug 2024 07:17:38 -0700 Subject: [PATCH 11/41] Improve cellranger description fields --- .../src/org/labkey/singlecell/run/CellRangerGexCountStep.java | 4 ++++ .../src/org/labkey/singlecell/run/CellRangerVDJWrapper.java | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java index 467d9db4b..3da34733d 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java @@ -165,6 +165,10 @@ protected static String getAlignDescription(PipelineStepProvider provider, Pi } List lines = new ArrayList<>(); + + String includeIntrons = provider.getParameterByName("includeIntrons").extractValue(ctx.getJob(), provider, stepIdx, String.class, "false"); + lines.add("Include Introns: " + includeIntrons); + if (addAligner) { lines.add("Aligner: " + provider.getName()); diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index f562f7284..c2af4703c 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -513,7 +513,8 @@ private File processOutputsForType(String sampleId, Readset rs, ReferenceGenome // NOTE: only tag the vloupe file for a/b: else if (isPrimaryDir) { - output.addSequenceOutput(outputVloupe, rs.getName() + " 10x VLoupe", "10x VLoupe", rs.getRowId(), null, referenceGenome.getGenomeId(), null); + String versionString = "Version: " + getWrapper().getVersionString(); + output.addSequenceOutput(outputVloupe, rs.getName() + " 10x VLoupe", "10x VLoupe", rs.getRowId(), null, referenceGenome.getGenomeId(), versionString); } return csv; From 67f77b7f86ef769473b807e7af75a45648828c2b Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 5 Aug 2024 10:58:19 -0700 Subject: [PATCH 12/41] Bugfix to cached ReadData --- .../sequenceanalysis/pipeline/SequenceAlignmentTask.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index e89f61f67..7df65a5eb 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -1894,10 +1894,10 @@ private void restoreArchivedReadDataIfNeeded(Readset rs) throws PipelineJobExcep RestoreSraDataHandler.FastqDumpWrapper sra = new RestoreSraDataHandler.FastqDumpWrapper(getJob().getLogger()); if (doneFile.exists()) { - rdi.setFile(new File(outDir, rd.getSra_accession() + "_1.fastq"), 0); + rdi.setFile(new File(outDir, rd.getSra_accession() + "_1.fastq"), 1); if (rd.getFileId2() != null) { - rdi.setFile(new File(outDir, rd.getSra_accession() + "_2.fastq"), 1); + rdi.setFile(new File(outDir, rd.getSra_accession() + "_2.fastq"), 2); } } else @@ -1908,8 +1908,8 @@ private void restoreArchivedReadDataIfNeeded(Readset rs) throws PipelineJobExcep } Pair downloaded = sra.downloadSra(rd.getSra_accession(), outDir); - rdi.setFile(downloaded.first, 0); - rdi.setFile(downloaded.second, 1); + rdi.setFile(downloaded.first, 1); + rdi.setFile(downloaded.second, 2); try { From e285a64a289de448a609ce3d5267d82948ee9b32 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 7 Aug 2024 06:00:12 -0700 Subject: [PATCH 13/41] Ensure SRA data not deleted until end of job --- .../labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index 7df65a5eb..3b194202b 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -1888,7 +1888,7 @@ private void restoreArchivedReadDataIfNeeded(Readset rs) throws PipelineJobExcep } File outDir = new File(getHelper().getWorkingDirectory(), "cachedReadData"); - getTaskFileManagerImpl().addIntermediateFile(outDir); + getTaskFileManagerImpl().addDeferredIntermediateFile(outDir); File doneFile = new File(outDir, rd.getSra_accession() + ".done"); RestoreSraDataHandler.FastqDumpWrapper sra = new RestoreSraDataHandler.FastqDumpWrapper(getJob().getLogger()); From 6545d9526778505ad84c98de5d78a3485980b3e7 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 7 Aug 2024 06:10:20 -0700 Subject: [PATCH 14/41] Update AppendCiteSeq for newer versions --- .../singlecell/pipeline/singlecell/AppendCiteSeq.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java index 8f5a2286e..bf8f8f273 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java @@ -132,7 +132,14 @@ protected Map prepareCountData(SingleCellOutput output, SequenceO if (dropAggregateBarcodes) { + // NOTE: this is the location in CellRanger <= 6.x File aggregateCountFile = new File(existingCountMatrixUmiDir.getParentFile(), "antibody_analysis/aggregate_barcodes.csv"); + if (!aggregateCountFile.exists()) + { + // This is the location for >= 7.x + aggregateCountFile = new File(existingCountMatrixUmiDir.getParentFile(), "aggregate_barcodes.csv"); + } + if (!aggregateCountFile.exists()) { throw new PipelineJobException("Unable to find aggregate count file: " + aggregateCountFile.getPath()); From 68f6af8a110f64e796c8796a06557c4f97f43e29 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 7 Aug 2024 07:28:02 -0700 Subject: [PATCH 15/41] Support minAllowableDoubletRateFilter --- .../api/singlecell/CellHashingService.java | 5 +++- .../singlecell/CellHashingServiceImpl.java | 23 +++++++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java index c1721f054..ea4a88270 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java +++ b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java @@ -103,6 +103,7 @@ public static class CellHashingParameters public boolean skipNormalizationQc = false; public Integer minCountPerCell = 5; public Double majorityConsensusThreshold = null; + public Double minAllowableDoubletRateFilter = null; public Double callerDisagreementThreshold = null; public List methods = CALLING_METHOD.getDefaultConsensusMethods(); //Default to just executing the set used for default consensus calls, rather than additional ones public List consensusMethods = null; @@ -110,7 +111,7 @@ public static class CellHashingParameters public Integer cells = 0; public boolean keepMarkdown = false; public File h5File = null; - public boolean doTSNE = true; + public boolean doTSNE = false; private CellHashingParameters() { @@ -124,6 +125,7 @@ public static CellHashingService.CellHashingParameters createFromStep(SequenceOu ret.skipNormalizationQc = step.getProvider().getParameterByName("skipNormalizationQc").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, false); ret.minCountPerCell = step.getProvider().getParameterByName("minCountPerCell").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Integer.class, 3); ret.majorityConsensusThreshold = step.getProvider().getParameterByName("majorityConsensusThreshold").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null); + ret.minAllowableDoubletRateFilter = step.getProvider().getParameterByName("minAllowableDoubletRateFilter").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null); ret.callerDisagreementThreshold = step.getProvider().getParameterByName("callerDisagreementThreshold").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null); ret.doTSNE = step.getProvider().getParameterByName("doTSNE").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, null); ret.retainRawCountFile = step.getProvider().getParameterByName("retainRawCountFile").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, true); @@ -166,6 +168,7 @@ public static CellHashingParameters createFromJson(BARCODE_TYPE type, File webse ret.skipNormalizationQc = params.optBoolean("skipNormalizationQc", false); ret.minCountPerCell = params.optInt("minCountPerCell", 3); ret.majorityConsensusThreshold = params.get("majorityConsensusThreshold") == null ? null : params.getDouble("majorityConsensusThreshold"); + ret.minAllowableDoubletRateFilter = params.get("minAllowableDoubletRateFilter") == null ? null : params.getDouble("minAllowableDoubletRateFilter"); ret.callerDisagreementThreshold = params.get("callerDisagreementThreshold") == null ? null : params.getDouble("callerDisagreementThreshold"); ret.doTSNE = params.get("doTSNE") == null || params.getBoolean("doTSNE"); ret.retainRawCountFile = params.optBoolean("retainRawCountFile", true); diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 898c5ccdc..29a63603a 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -964,8 +964,13 @@ public List getHashingCallingParams(boolean allowMethod put("maxValue", 1); put("decimalPrecision", 2); }}, 0.2), + ToolParameterDescriptor.create("minAllowableDoubletRateFilter", "Min Allowable Doublet Rate Filter", "This applies to filtering algorithms based on their doublet rate. Typically, any algorithm with a high doublet rate (based on the theoretical rate) is discarded. This sets a minimum to the threshold used for triaging an algorithm", "ldk-numberfield", new JSONObject(){{ + put("minValue", 0); + put("maxValue", 1); + put("decimalPrecision", 2); + }}, 0.2), ToolParameterDescriptor.create("skipNormalizationQc", "Skip Normalization QC", null, "checkbox", null, true), - ToolParameterDescriptor.create("doTSNE", "Do tSNE", "If true, tSNE will be performed as part of QC", "checkbox", null, true), + ToolParameterDescriptor.create("doTSNE", "Do tSNE", "If true, tSNE will be performed as part of QC", "checkbox", null, false), ToolParameterDescriptor.create("retainRawCountFile", "Retain Raw Counts File", null, "checkbox", null, false), ToolParameterDescriptor.create("failIfUnexpectedHtosFound", "Fail If Unexpected HTOs Found", "If checked and if there are any HTOs (testing all known HTOs) with counts above the HTOs expected in this experiment, then an error will be thrown", "checkbox", new JSONObject(){{ put("checked", true); @@ -1263,7 +1268,21 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St String doTSNE = parameters.doTSNE ? "TRUE" : "FALSE"; String h5String = h5 == null ? "" : ", rawFeatureMatrixH5 = '/work/" + h5.getName() + "'"; String consensusMethodString = consensusMethodNames.isEmpty() ? "" : ", methodsForConsensus = c('" + StringUtils.join(consensusMethodNames, "','") + "')"; - writer.println("f <- cellhashR::CallAndGenerateReport(rawCountData = '/work/" + citeSeqCountOutDir.getName() + "'" + h5String + ", molInfoFile = '/work/" + molInfo.getName() + "', reportFile = '/work/" + htmlFile.getName() + "', callFile = '/work/" + callsFile.getName() + "', metricsFile = '/work/" + metricsFile.getName() + "', rawCountsExport = '/work/" + countFile.getName() + "', cellbarcodeWhitelist = " + cellbarcodeWhitelist + ", barcodeWhitelist = " + allowableBarcodeParam + ", title = '" + parameters.getReportTitle() + "', skipNormalizationQc = " + skipNormalizationQcString + ", methods = c('" + StringUtils.join(methodNames, "','") + "')" + consensusMethodString + ", keepMarkdown = " + keepMarkdown + ", minCountPerCell = " + (parameters.minCountPerCell == null ? "NULL" : parameters.minCountPerCell) + ", majorityConsensusThreshold = " + (parameters.majorityConsensusThreshold == null ? "NULL" : parameters.majorityConsensusThreshold) + ", callerDisagreementThreshold = " + (parameters.callerDisagreementThreshold == null ? "NULL" : parameters.callerDisagreementThreshold) + ", doTSNE = " + doTSNE + ")"); + writer.println("f <- cellhashR::CallAndGenerateReport(rawCountData = '/work/" + citeSeqCountOutDir.getName() + "'" + h5String + ", molInfoFile = '/work/" + molInfo.getName() + + "', reportFile = '/work/" + htmlFile.getName() + + "', callFile = '/work/" + callsFile.getName() + + "', metricsFile = '/work/" + metricsFile.getName() + + "', rawCountsExport = '/work/" + countFile.getName() + + "', cellbarcodeWhitelist = " + cellbarcodeWhitelist + ", barcodeWhitelist = " + allowableBarcodeParam + + ", title = '" + parameters.getReportTitle() + + "', skipNormalizationQc = " + skipNormalizationQcString + + ", methods = c('" + StringUtils.join(methodNames, "','") + "')" + consensusMethodString + + ", keepMarkdown = " + keepMarkdown + + ", minCountPerCell = " + (parameters.minCountPerCell == null ? "NULL" : parameters.minCountPerCell) + + ", majorityConsensusThreshold = " + (parameters.majorityConsensusThreshold == null ? "NULL" : parameters.majorityConsensusThreshold) + + ", callerDisagreementThreshold = " + (parameters.callerDisagreementThreshold == null ? "NULL" : parameters.callerDisagreementThreshold) + + (parameters.minAllowableDoubletRateFilter == null ? "" : "', minAllowableDoubletRateFilter = " + parameters.minAllowableDoubletRateFilter) + + ", doTSNE = " + doTSNE + ")"); writer.println("print('Rmarkdown complete')"); } From 098bb39d30c37fe802c3521cba362bac572eb0dc Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 7 Aug 2024 08:38:02 -0700 Subject: [PATCH 16/41] Do more cleanup in CellRangerVDJWrapper --- .../singlecell/run/CellRangerVDJWrapper.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index c2af4703c..9dc4b4692 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -419,6 +419,9 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List inp getPipelineCtx().getLogger().warn("Unable to find folder: " + directory.getPath()); } + output.addIntermediateFile(new File(outdir, "vdj_reference")); + output.addIntermediateFile(new File(outdir, "multi")); + try { File outputHtml = new File(outdir, "per_sample_outs/" + id + "/web_summary.html"); @@ -517,6 +520,19 @@ else if (isPrimaryDir) output.addSequenceOutput(outputVloupe, rs.getName() + " 10x VLoupe", "10x VLoupe", rs.getRowId(), null, referenceGenome.getGenomeId(), versionString); } + output.addIntermediateFile(new File(sampleDir, "airr_rearrangement.tsv")); + output.addIntermediateFile(new File(sampleDir, "all_contig_annotations.bed")); + output.addIntermediateFile(new File(sampleDir, "all_contig_annotations.json")); + output.addIntermediateFile(new File(sampleDir, "cell_barcodes.json")); + output.addIntermediateFile(new File(sampleDir, "concat_ref.bam")); + output.addIntermediateFile(new File(sampleDir, "concat_ref.bam.bai")); + output.addIntermediateFile(new File(sampleDir, "concat_ref.fasta")); + output.addIntermediateFile(new File(sampleDir, "concat_ref.fasta.fai")); + output.addIntermediateFile(new File(sampleDir, "consensus.bam")); + output.addIntermediateFile(new File(sampleDir, "consensus.bam.bai")); + output.addIntermediateFile(new File(sampleDir, "donor_regions.fa")); + output.addIntermediateFile(new File(sampleDir, "vdj_contig_info.pb")); + return csv; } From 69479e9e4394a9ed9200da8dcb7fea05255ac3d2 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 8 Aug 2024 05:40:04 -0700 Subject: [PATCH 17/41] Bugfix SRA download --- .../sequenceanalysis/pipeline/SequenceAlignmentTask.java | 4 ++-- .../labkey/sequenceanalysis/run/RestoreSraDataHandler.java | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index 3b194202b..a28e3d931 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -1894,10 +1894,10 @@ private void restoreArchivedReadDataIfNeeded(Readset rs) throws PipelineJobExcep RestoreSraDataHandler.FastqDumpWrapper sra = new RestoreSraDataHandler.FastqDumpWrapper(getJob().getLogger()); if (doneFile.exists()) { - rdi.setFile(new File(outDir, rd.getSra_accession() + "_1.fastq"), 1); + rdi.setFile(new File(outDir, rd.getSra_accession() + "_1.fastq.gz"), 1); if (rd.getFileId2() != null) { - rdi.setFile(new File(outDir, rd.getSra_accession() + "_2.fastq"), 2); + rdi.setFile(new File(outDir, rd.getSra_accession() + "_2.fastq.gz"), 2); } } else diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java index 8f6a28ca8..4fd234370 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java @@ -512,8 +512,11 @@ public Pair downloadSra(String dataset, File outDir) throws Pipeline { getLogger().info("Deleting extra files: "); files.forEach(f -> { - getLogger().info(f.getName()); - f.delete(); + if (f.exists()) + { + getLogger().info(f.getName()); + f.delete(); + } }); } From ab24cc3a0ed3b41b085b8e854b9e89c7b6614fad Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 8 Aug 2024 05:53:07 -0700 Subject: [PATCH 18/41] Bugfix to cell hashing command --- .../singlecell/CellHashingServiceImpl.java | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 29a63603a..4e2a03321 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -1268,20 +1268,23 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St String doTSNE = parameters.doTSNE ? "TRUE" : "FALSE"; String h5String = h5 == null ? "" : ", rawFeatureMatrixH5 = '/work/" + h5.getName() + "'"; String consensusMethodString = consensusMethodNames.isEmpty() ? "" : ", methodsForConsensus = c('" + StringUtils.join(consensusMethodNames, "','") + "')"; - writer.println("f <- cellhashR::CallAndGenerateReport(rawCountData = '/work/" + citeSeqCountOutDir.getName() + "'" + h5String + ", molInfoFile = '/work/" + molInfo.getName() + - "', reportFile = '/work/" + htmlFile.getName() + - "', callFile = '/work/" + callsFile.getName() + - "', metricsFile = '/work/" + metricsFile.getName() + - "', rawCountsExport = '/work/" + countFile.getName() + - "', cellbarcodeWhitelist = " + cellbarcodeWhitelist + ", barcodeWhitelist = " + allowableBarcodeParam + - ", title = '" + parameters.getReportTitle() + - "', skipNormalizationQc = " + skipNormalizationQcString + - ", methods = c('" + StringUtils.join(methodNames, "','") + "')" + consensusMethodString + + writer.println("f <- cellhashR::CallAndGenerateReport(rawCountData = '/work/" + citeSeqCountOutDir.getName() + "'" + h5String + + ", molInfoFile = '/work/" + molInfo.getName() + "'" + + ", reportFile = '/work/" + htmlFile.getName() + "'" + + ", callFile = '/work/" + callsFile.getName() + "'" + + ", metricsFile = '/work/" + metricsFile.getName() + "'" + + ", rawCountsExport = '/work/" + countFile.getName() + "'" + + ", cellbarcodeWhitelist = " + cellbarcodeWhitelist + + ", barcodeWhitelist = " + allowableBarcodeParam + + ", title = '" + parameters.getReportTitle() + "'" + + ", skipNormalizationQc = " + skipNormalizationQcString + + ", methods = c('" + StringUtils.join(methodNames, "','") + "')" + + consensusMethodString + ", keepMarkdown = " + keepMarkdown + ", minCountPerCell = " + (parameters.minCountPerCell == null ? "NULL" : parameters.minCountPerCell) + ", majorityConsensusThreshold = " + (parameters.majorityConsensusThreshold == null ? "NULL" : parameters.majorityConsensusThreshold) + ", callerDisagreementThreshold = " + (parameters.callerDisagreementThreshold == null ? "NULL" : parameters.callerDisagreementThreshold) + - (parameters.minAllowableDoubletRateFilter == null ? "" : "', minAllowableDoubletRateFilter = " + parameters.minAllowableDoubletRateFilter) + + (parameters.minAllowableDoubletRateFilter == null ? "" : ", minAllowableDoubletRateFilter = " + parameters.minAllowableDoubletRateFilter) + ", doTSNE = " + doTSNE + ")"); writer.println("print('Rmarkdown complete')"); From e90c1430251f594be85b72cee783cf2f9d56251f Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 9 Aug 2024 12:40:23 -0700 Subject: [PATCH 19/41] Push RIRA logic into R package --- singlecell/resources/chunks/RunRiraClassification.R | 1 - 1 file changed, 1 deletion(-) diff --git a/singlecell/resources/chunks/RunRiraClassification.R b/singlecell/resources/chunks/RunRiraClassification.R index 986208e64..1e7d5932a 100644 --- a/singlecell/resources/chunks/RunRiraClassification.R +++ b/singlecell/resources/chunks/RunRiraClassification.R @@ -4,7 +4,6 @@ for (datasetId in names(seuratObjects)) { seuratObj <- RIRA::Classify_ImmuneCells(seuratObj, maxBatchSize = maxBatchSize, retainProbabilityMatrix = retainProbabilityMatrix) seuratObj <- RIRA::Classify_TNK(seuratObj, maxBatchSize = maxBatchSize, retainProbabilityMatrix = retainProbabilityMatrix) - seuratObj$RIRA_TNK_v2.predicted_labels[seuratObj$RIRA_Immune_v2.majority_voting != 'T_NK'] <- 'Other' seuratObj <- RIRA::Classify_Myeloid(seuratObj, maxBatchSize = maxBatchSize, retainProbabilityMatrix = retainProbabilityMatrix) From 18013c8ab71e4e8b8c520e096d9dd032cec01be9 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 10 Aug 2024 04:32:54 -0700 Subject: [PATCH 20/41] Bugfix to MergeBamAlignment --- .../sequenceanalysis/run/util/MergeBamAlignmentWrapper.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/MergeBamAlignmentWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/MergeBamAlignmentWrapper.java index c254047b1..171bf5d07 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/MergeBamAlignmentWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/MergeBamAlignmentWrapper.java @@ -94,7 +94,7 @@ else if (header.getReadGroups().size() > 1) FastqToSamWrapper fq = new FastqToSamWrapper(getLogger()); fq.setOutputDir(alignedBam.getParentFile()); fq.setStringency(ValidationStringency.SILENT); - File unmappedReadsBam = fq.execute(pair.getKey(), pair.getValue(), SAMFileHeader.SortOrder.queryname, rg); + File unmappedReadsBam = fq.execute(pair.getKey(), pair.getValue(), SAMFileHeader.SortOrder.unsorted, rg); if (!unmappedReadsBam.exists()) { throw new PipelineJobException("BAM file not created, expected: " + unmappedReadsBam.getPath()); @@ -121,8 +121,6 @@ else if (header.getReadGroups().size() > 1) params.add("--UNMAPPED_BAM"); params.add(finalUnmappedReadsBam.getPath()); - //TODO: bisulfiteSequence - params.add("--REFERENCE_SEQUENCE"); params.add(refFasta.getPath()); From 1280c0ab3298734091c9d009af099312f4466908 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 13 Aug 2024 07:13:13 -0700 Subject: [PATCH 21/41] More verbose logging in CellRangerVDJWrapper --- .../singlecell/run/CellRangerVDJWrapper.java | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index 9dc4b4692..4b0e00f13 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -53,6 +53,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Date; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -843,7 +844,7 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp try (BufferedReader reader = Readers.getReader(inputCsv)) { String line; - int chimericCallsRecovered = 0; + Map chimericCallsRecovered = new HashMap<>(); int restoredTRDVAV = 0; int lineIdx = 0; @@ -904,24 +905,28 @@ else if (idx == 9) // Recover TRDV/TRAJ/TRAC: if (uniqueChains.size() > 1) { + // Defer to the constant region, if known: if (cGeneChain != null) { uniqueChains.clear(); uniqueChains.add(cGeneChain); - chimericCallsRecovered++; + String key = originalChain + "->" + cGeneChain + " (based on C-GENE)"; + chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0)); } else if (uniqueChains.size() == 2) { if ("TRD".equals(vGeneChain) && "TRA".equals(jGeneChain)) { uniqueChains.clear(); - chimericCallsRecovered++; + String key = originalChain + "->" + vGeneChain + " (based on V-GENE)"; + chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0)); uniqueChains.add(vGeneChain); } if ("TRA".equals(vGeneChain) && "TRD".equals(jGeneChain)) { uniqueChains.clear(); - chimericCallsRecovered++; + String key = originalChain + "->" + vGeneChain + " (based on V-GENE)"; + chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0)); uniqueChains.add(vGeneChain); } } @@ -934,7 +939,7 @@ else if (uniqueChains.size() == 2) } else { - log.error("Multiple chains detected [" + StringUtils.join(chains, ",")+ "], leaving original call alone: " + originalChain + ". " + tokens[6] + "/" + tokens[8] + "/" + tokens[9]); + log.info("Multiple chains detected [" + StringUtils.join(chains, ",")+ "], leaving original call alone: " + originalChain + ". " + tokens[6] + "/" + tokens[8] + "/" + tokens[9]); } if (acceptableChains.contains(tokens[5])) @@ -943,7 +948,15 @@ else if (uniqueChains.size() == 2) } } - log.info("\tChimeric calls recovered: " + chimericCallsRecovered); + if (!chimericCallsRecovered.isEmpty()) + { + log.info("\tChimeric calls recovered: "); + for (String key : chimericCallsRecovered.keySet()) + { + log.info("\t" + key + ": " + chimericCallsRecovered.get(key)); + } + } + log.info("\tTRDV/AV calls restored: " + restoredTRDVAV); } } From ffd2f1dc1478f60809806fc2a91f84beeea68888 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 14 Aug 2024 03:03:01 -0700 Subject: [PATCH 22/41] Skip duplicated SRA accessions when re-downloading --- .../pipeline/SequenceAlignmentTask.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index a28e3d931..90b94fbec 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -1866,6 +1866,7 @@ public void serializeTest() throws Exception private void restoreArchivedReadDataIfNeeded(Readset rs) throws PipelineJobException { + Set sraIDs = new HashSet<>(); for (ReadData rd : rs.getReadData()) { if (! (rd instanceof ReadDataImpl rdi)) @@ -1886,11 +1887,29 @@ private void restoreArchivedReadDataIfNeeded(Readset rs) throws PipelineJobExcep { throw new PipelineJobException("Missing SRA accession: " + rd.getRowid()); } + else if (sraIDs.contains(rd.getSra_accession())) + { + getJob().getLogger().debug("Already encountered accession, skipping: " + rd.getSra_accession()); + if (rs instanceof SequenceReadsetImpl rsi) + { + // Remove the duplicate + List rdl = new ArrayList<>(rsi.getReadDataImpl()); + rdl.remove(rd); + rsi.setReadData(rdl); + } + else + { + throw new PipelineJobException("Expected readset to be SequenceReadsetImpl"); + } + + continue; + } File outDir = new File(getHelper().getWorkingDirectory(), "cachedReadData"); getTaskFileManagerImpl().addDeferredIntermediateFile(outDir); File doneFile = new File(outDir, rd.getSra_accession() + ".done"); + sraIDs.add(rd.getSra_accession()); RestoreSraDataHandler.FastqDumpWrapper sra = new RestoreSraDataHandler.FastqDumpWrapper(getJob().getLogger()); if (doneFile.exists()) { From 82c7405435c1cbb3ab13b983146ec09c8a21328a Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 16 Aug 2024 03:44:03 -0700 Subject: [PATCH 23/41] Update defaults for CellRangerGexCountStep --- .../src/org/labkey/singlecell/run/CellRangerGexCountStep.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java index 3da34733d..56d4fbb54 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java @@ -359,7 +359,8 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu File indexDir = AlignerIndexUtil.getIndexDir(referenceGenome, getIndexCachedDirName(getPipelineCtx().getJob())); args.add("--transcriptome=" + indexDir.getPath()); - args.add("--create-bam=true"); + boolean discardBam = getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); + args.add("--create-bam=" + !discardBam); getWrapper().setWorkingDir(outputDirectory); From 075a2ee805e3d71afdf94ec2146efacfed65ead7 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 16 Aug 2024 11:38:02 -0700 Subject: [PATCH 24/41] Allow seurat pipeline to be kicked off against 10x Run Summaries --- .../labkey/singlecell/analysis/ProcessSingleCellHandler.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/analysis/ProcessSingleCellHandler.java b/singlecell/src/org/labkey/singlecell/analysis/ProcessSingleCellHandler.java index 0ef890877..5aefab2c8 100644 --- a/singlecell/src/org/labkey/singlecell/analysis/ProcessSingleCellHandler.java +++ b/singlecell/src/org/labkey/singlecell/analysis/ProcessSingleCellHandler.java @@ -23,7 +23,10 @@ public String getName() @Override public boolean canProcess(SequenceOutputFile f) { - return f.getFile() != null && LOUPE_TYPE.isType(f.getFile()); + return f.getFile() != null && ( + LOUPE_TYPE.isType(f.getFile()) || + ("10x Run Summary".equals(f.getCategory()) && f.getName().contains("10x Count Summary")) + ); } @Override From 0b52dce8192083092ea23b23b1dec582cec1be1c Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 19 Aug 2024 19:38:13 -0700 Subject: [PATCH 25/41] Bugfix to UpdateReadsetFilesHandler --- .../analysis/UpdateReadsetFilesHandler.java | 22 +++++++++++++++++++ .../pipeline/singlecell/AppendSaturation.java | 5 +++++ 2 files changed, 27 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java index 70f711efa..1a7484a47 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java @@ -1,5 +1,6 @@ package org.labkey.sequenceanalysis.analysis; +import com.google.common.io.Files; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriterFactory; @@ -111,6 +112,10 @@ else if (SequenceUtil.FILETYPE.gvcf.getFileType().isType(so.getFile()) | Sequenc { getAndValidateHeaderForVcf(so, newRsName); } + else + { + throw new PipelineJobException("Unexpected file type: " + so.getFile().getPath()); + } ctx.getSequenceSupport().cacheObject("readsetId", newRsName); } @@ -207,6 +212,18 @@ private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName String existingSample = header.getGenotypeSamples().get(0); File sampleNamesFile = new File(ctx.getWorkingDirectory(), "sampleNames.txt"); + if (!sampleNamesFile.exists()) + { + try + { + Files.touch(sampleNamesFile); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + try (PrintWriter writer = PrintWriters.getPrintWriter(sampleNamesFile, StandardOpenOption.APPEND)) { writer.println(newRsName); @@ -243,6 +260,11 @@ private void addTracker(SequenceOutputFile so, String existingSample, String new { File tracker = new File(so.getFile().getParentFile(), "reheaderHistory.txt"); boolean preExisting = tracker.exists(); + if (!preExisting) + { + Files.touch(tracker); + } + try (PrintWriter writer = PrintWriters.getPrintWriter(tracker, StandardOpenOption.APPEND)) { if (!preExisting) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendSaturation.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendSaturation.java index 322cd00dd..0314d69d1 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendSaturation.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendSaturation.java @@ -65,6 +65,11 @@ public void init(SequenceOutputHandler.JobContext ctx, List { if (!LOUPE_TYPE.isType(so.getFile())) { + if (!so.getFile().getName().endsWith("seurat.rds")) + { + throw new PipelineJobException("Unexpected file type: " + so.getFile().getPath()); + } + File meta = new File(so.getFile().getPath().replaceAll(".seurat.rds", ".cellBarcodes.csv")); if (!meta.exists()) { From 324cf7e9380351e7a9a229f9a0825329bb40333e Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 19 Aug 2024 19:47:32 -0700 Subject: [PATCH 26/41] Bugfix to AppendCiteSeq --- singlecell/resources/chunks/AppendCiteSeq.R | 3 ++- .../pipeline/singlecell/AppendCiteSeq.java | 27 ++++++++++--------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/singlecell/resources/chunks/AppendCiteSeq.R b/singlecell/resources/chunks/AppendCiteSeq.R index b070f3fb0..b22411466 100644 --- a/singlecell/resources/chunks/AppendCiteSeq.R +++ b/singlecell/resources/chunks/AppendCiteSeq.R @@ -20,7 +20,8 @@ for (datasetId in names(seuratObjects)) { if (dropAggregateBarcodes) { aggregateBarcodeFile <- paste0(matrixDir, ".aggregateCounts.csv") if (!file.exists(aggregateBarcodeFile)) { - stop(paste0('Unable to find file: ', aggregateBarcodeFile)) + print(paste0('Unable to find file, skipping: ', aggregateBarcodeFile)) + aggregateBarcodeFile <- NULL } } diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java index bf8f8f273..d9d58fa37 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java @@ -142,23 +142,26 @@ protected Map prepareCountData(SingleCellOutput output, SequenceO if (!aggregateCountFile.exists()) { - throw new PipelineJobException("Unable to find aggregate count file: " + aggregateCountFile.getPath()); + ctx.getLogger().info("Unable to find aggregate count file, assuming there are none: " + aggregateCountFile.getPath()); } - localAggregateCountFile = new File(ctx.getOutputDir(), localCopyUmiCountDir.getName() + ".aggregateCounts.csv"); - try + else { - if (localAggregateCountFile.exists()) + localAggregateCountFile = new File(ctx.getOutputDir(), localCopyUmiCountDir.getName() + ".aggregateCounts.csv"); + try { - localAggregateCountFile.delete(); - } + if (localAggregateCountFile.exists()) + { + localAggregateCountFile.delete(); + } - FileUtils.copyFile(aggregateCountFile, localAggregateCountFile); - } - catch (IOException e) - { - throw new PipelineJobException(e); + FileUtils.copyFile(aggregateCountFile, localAggregateCountFile); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + ctx.getFileManager().addIntermediateFile(localAggregateCountFile); } - ctx.getFileManager().addIntermediateFile(localAggregateCountFile); } File validAdt = CellHashingServiceImpl.get().getValidCiteSeqBarcodeMetadataFile(ctx.getSourceDirectory(), parentReadset.getReadsetId()); From b29dfe724fab3257c2d3c35f5223f25310d33703 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 20 Aug 2024 06:59:36 -0700 Subject: [PATCH 27/41] Bugfix to UpdateReadsetFilesHandler --- .../analysis/UpdateReadsetFilesHandler.java | 40 ++++++++++++++++--- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java index 1a7484a47..9616213a2 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java @@ -23,6 +23,7 @@ import org.labkey.api.sequenceanalysis.model.Readset; import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; @@ -242,11 +243,19 @@ private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName try { File outputIdx = SequenceAnalysisService.get().ensureVcfIndex(outputVcf, ctx.getLogger(), false); - FileUtils.moveFile(outputVcf, so.getFile(), StandardCopyOption.REPLACE_EXISTING); + if (so.getFile().exists()) + { + so.getFile().delete(); + } + FileUtils.moveFile(outputVcf, so.getFile()); FileType gz = new FileType(".gz"); File inputIndex = gz.isType(so.getFile()) ? new File(so.getFile().getPath() + ".tbi") : new File(so.getFile().getPath() + FileExtensions.TRIBBLE_INDEX); - FileUtils.moveFile(outputIdx, inputIndex, StandardCopyOption.REPLACE_EXISTING); + if (inputIndex.exists()) + { + inputIndex.delete(); + } + FileUtils.moveFile(outputIdx, inputIndex); addTracker(so, existingSample, newRsName); } @@ -301,11 +310,17 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new throw new PipelineJobException("Expected header was not created: " + headerBam.getPath()); } + ReferenceGenome rg = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()); + if (rg == null) + { + throw new PipelineJobException("Unable to find genome: " + so.getLibrary_id()); + } + ctx.getFileManager().addIntermediateFile(headerBam); ctx.getFileManager().addIntermediateFile(SequencePipelineService.get().getExpectedIndex(headerBam)); File output = new File(ctx.getWorkingDirectory(), so.getFile().getName()); - new ReplaceSamHeaderWrapper(ctx.getLogger()).execute(so.getFile(), output, headerBam); + new ReplaceSamHeaderWrapper(ctx.getLogger()).execute(so.getFile(), output, headerBam, rg); if (!output.exists()) { throw new PipelineJobException("Missing file: " + output.getPath()); @@ -313,8 +328,18 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new File outputIdx = SequencePipelineService.get().ensureBamIndex(output, ctx.getLogger(), false); - FileUtils.moveFile(output, so.getFile(), StandardCopyOption.REPLACE_EXISTING); - FileUtils.moveFile(outputIdx, SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()), StandardCopyOption.REPLACE_EXISTING); + if (so.getFile().exists()) + { + so.getFile().delete(); + } + FileUtils.moveFile(output, so.getFile()); + + File inputIndex = SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()); + if (inputIndex.exists()) + { + inputIndex.delete(); + } + FileUtils.moveFile(outputIdx, inputIndex); addTracker(so, existingSample, newRsName); } @@ -337,7 +362,7 @@ protected String getToolName() return "ReplaceSamHeader"; } - public void execute(File input, File output, File headerBam) throws PipelineJobException + public void execute(File input, File output, File headerBam, ReferenceGenome genome) throws PipelineJobException { List params = new ArrayList<>(getBaseArgs()); @@ -350,6 +375,9 @@ public void execute(File input, File output, File headerBam) throws PipelineJobE params.add("--HEADER"); params.add(headerBam.getPath()); + params.add("-R"); + params.add(genome.getWorkingFastaFile().getPath()); + execute(params); } } From 82797da5a4159829c42eea80048cb48317fefcad Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 24 Aug 2024 05:27:32 -0700 Subject: [PATCH 28/41] Bugfix to CellRangerVDJWrapper --- .../labkey/api/singlecell/CellHashingService.java | 4 ++-- .../singlecell/run/CellRangerVDJWrapper.java | 14 ++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java index ea4a88270..8642df2c8 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java +++ b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java @@ -127,7 +127,7 @@ public static CellHashingService.CellHashingParameters createFromStep(SequenceOu ret.majorityConsensusThreshold = step.getProvider().getParameterByName("majorityConsensusThreshold").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null); ret.minAllowableDoubletRateFilter = step.getProvider().getParameterByName("minAllowableDoubletRateFilter").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null); ret.callerDisagreementThreshold = step.getProvider().getParameterByName("callerDisagreementThreshold").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null); - ret.doTSNE = step.getProvider().getParameterByName("doTSNE").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, null); + ret.doTSNE = step.getProvider().getParameterByName("doTSNE").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, false); ret.retainRawCountFile = step.getProvider().getParameterByName("retainRawCountFile").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, true); ret.failIfUnexpectedHtosFound = step.getProvider().getParameterByName("failIfUnexpectedHtosFound").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, true); ret.htoReadset = htoReadset; @@ -170,7 +170,7 @@ public static CellHashingParameters createFromJson(BARCODE_TYPE type, File webse ret.majorityConsensusThreshold = params.get("majorityConsensusThreshold") == null ? null : params.getDouble("majorityConsensusThreshold"); ret.minAllowableDoubletRateFilter = params.get("minAllowableDoubletRateFilter") == null ? null : params.getDouble("minAllowableDoubletRateFilter"); ret.callerDisagreementThreshold = params.get("callerDisagreementThreshold") == null ? null : params.getDouble("callerDisagreementThreshold"); - ret.doTSNE = params.get("doTSNE") == null || params.getBoolean("doTSNE"); + ret.doTSNE = params.get("doTSNE") == null || params.optBoolean("doTSNE", false); ret.retainRawCountFile = params.optBoolean("retainRawCountFile", true); ret.failIfUnexpectedHtosFound = params.optBoolean("failIfUnexpectedHtosFound", true); ret.htoReadset = htoReadset; diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index 4b0e00f13..52e8d8d32 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -473,7 +473,13 @@ private File processOutputsForType(String sampleId, Readset rs, ReferenceGenome { try { - FileUtils.moveFile(f, new File(sampleDir, f.getName())); + File dest = new File(sampleDir, f.getName()); + if (dest.exists()) + { + dest.delete(); + } + + FileUtils.moveFile(f, dest); } catch (IOException e) { @@ -911,7 +917,7 @@ else if (idx == 9) uniqueChains.clear(); uniqueChains.add(cGeneChain); String key = originalChain + "->" + cGeneChain + " (based on C-GENE)"; - chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0)); + chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0) + 1); } else if (uniqueChains.size() == 2) { @@ -919,14 +925,14 @@ else if (uniqueChains.size() == 2) { uniqueChains.clear(); String key = originalChain + "->" + vGeneChain + " (based on V-GENE)"; - chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0)); + chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0) + 1); uniqueChains.add(vGeneChain); } if ("TRA".equals(vGeneChain) && "TRD".equals(jGeneChain)) { uniqueChains.clear(); String key = originalChain + "->" + vGeneChain + " (based on V-GENE)"; - chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0)); + chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0) + 1); uniqueChains.add(vGeneChain); } } From 7008b39a3e94df501698b2ec496a567134ed2e4e Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 24 Aug 2024 21:16:03 -0700 Subject: [PATCH 29/41] Bugfix to CellRangerVDJWrapper --- .../api-src/org/labkey/api/singlecell/CellHashingService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java index 8642df2c8..97b12c45f 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java +++ b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java @@ -170,7 +170,7 @@ public static CellHashingParameters createFromJson(BARCODE_TYPE type, File webse ret.majorityConsensusThreshold = params.get("majorityConsensusThreshold") == null ? null : params.getDouble("majorityConsensusThreshold"); ret.minAllowableDoubletRateFilter = params.get("minAllowableDoubletRateFilter") == null ? null : params.getDouble("minAllowableDoubletRateFilter"); ret.callerDisagreementThreshold = params.get("callerDisagreementThreshold") == null ? null : params.getDouble("callerDisagreementThreshold"); - ret.doTSNE = params.get("doTSNE") == null || params.optBoolean("doTSNE", false); + ret.doTSNE = params.optBoolean("doTSNE", false); ret.retainRawCountFile = params.optBoolean("retainRawCountFile", true); ret.failIfUnexpectedHtosFound = params.optBoolean("failIfUnexpectedHtosFound", true); ret.htoReadset = htoReadset; From 4b3375e931baac8451a2351bc131064fb1426651 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 25 Aug 2024 14:33:41 -0700 Subject: [PATCH 30/41] Allow CellRanger to more easily skip BAM creation --- .../pipeline/SequenceAlignmentTask.java | 89 ++++++++++--------- .../run/CellRangerGexCountStep.java | 11 ++- 2 files changed, 56 insertions(+), 44 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index 90b94fbec..fcbbaad5b 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -1383,57 +1383,66 @@ public File doAlignmentForSet(List> inputFiles, ReferenceGenome AlignmentStep.AlignmentOutput alignmentOutput = alignmentStep.performAlignment(rs, forwardFastqs, reverseFastqs, outputDirectory, referenceGenome, SequenceTaskHelper.getUnzippedBaseName(inputFiles.get(0).first.getName()) + "." + alignmentStep.getProvider().getName().toLowerCase(), String.valueOf(lowestReadDataId), platformUnit); getHelper().getFileManager().addStepOutputs(alignmentAction, alignmentOutput); - if (alignmentOutput.getBAM() == null || !alignmentOutput.getBAM().exists()) - { - throw new PipelineJobException("Unable to find BAM file after alignment: " + alignmentOutput.getBAM()); - } Date end = new Date(); alignmentAction.setEndTime(end); getJob().getLogger().info(alignmentStep.getProvider().getLabel() + " Duration: " + DurationFormatUtils.formatDurationWords(end.getTime() - start.getTime(), true, true)); actions.add(alignmentAction); - SequenceUtil.logFastqBamDifferences(getJob().getLogger(), alignmentOutput.getBAM()); - - ToolParameterDescriptor mergeParam = alignmentStep.getProvider().getParameterByName(AbstractAlignmentStepProvider.SUPPORT_MERGED_UNALIGNED); - boolean doMergeUnaligned = alignmentStep.getProvider().supportsMergeUnaligned() && mergeParam != null && mergeParam.extractValue(getJob(), alignmentStep.getProvider(), alignmentStep.getStepIdx(), Boolean.class, false); - if (doMergeUnaligned) + boolean discardBam = alignmentStep.getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getJob(), alignmentStep.getProvider(), alignmentStep.getStepIdx(), Boolean.class, false); + if (discardBam && alignmentOutput.getBAM() == null) { - getJob().setStatus(PipelineJob.TaskStatus.running, "MERGING UNALIGNED READS INTO BAM" + msgSuffix); - getJob().getLogger().info("merging unaligned reads into BAM"); - File idx = SequenceAnalysisService.get().getExpectedBamOrCramIndex(alignmentOutput.getBAM()); - if (idx.exists()) - { - getJob().getLogger().debug("deleting index: " + idx.getPath()); - idx.delete(); - } - - //merge unaligned reads and clean file - MergeBamAlignmentWrapper wrapper = new MergeBamAlignmentWrapper(getJob().getLogger()); - wrapper.executeCommand(referenceGenome.getWorkingFastaFile(), alignmentOutput.getBAM(), inputFiles, null); - getHelper().getFileManager().addCommandsToAction(wrapper.getCommandsExecuted(), alignmentAction); + getJob().getLogger().info("The alignment did not produce a BAM, but discardBam was selected. BAM stats steps will be skipped."); } else { - getJob().getLogger().info("skipping merge of unaligned reads on BAM"); - } + if (alignmentOutput.getBAM() == null || !alignmentOutput.getBAM().exists()) + { + throw new PipelineJobException("Unable to find BAM file after alignment: " + alignmentOutput.getBAM()); + } - if (alignmentStep.doAddReadGroups()) - { - getJob().setStatus(PipelineJob.TaskStatus.running, "ADDING READ GROUPS" + msgSuffix); - AddOrReplaceReadGroupsWrapper wrapper = new AddOrReplaceReadGroupsWrapper(getJob().getLogger()); - wrapper.executeCommand(alignmentOutput.getBAM(), null, rs.getReadsetId().toString(), rs.getPlatform(), (platformUnit == null ? rs.getReadsetId().toString() : platformUnit), rs.getName().replaceAll(" ", "_")); - getHelper().getFileManager().addCommandsToAction(wrapper.getCommandsExecuted(), alignmentAction); - } - else - { - getJob().getLogger().info("skipping read group assignment"); - } + SequenceUtil.logFastqBamDifferences(getJob().getLogger(), alignmentOutput.getBAM()); - //generate stats - getJob().setStatus(PipelineJob.TaskStatus.running, "Generating BAM Stats"); - FlagStatRunner runner = new FlagStatRunner(getJob().getLogger()); - runner.execute(alignmentOutput.getBAM()); - getHelper().getFileManager().addCommandsToAction(runner.getCommandsExecuted(), alignmentAction); + ToolParameterDescriptor mergeParam = alignmentStep.getProvider().getParameterByName(AbstractAlignmentStepProvider.SUPPORT_MERGED_UNALIGNED); + boolean doMergeUnaligned = alignmentStep.getProvider().supportsMergeUnaligned() && mergeParam != null && mergeParam.extractValue(getJob(), alignmentStep.getProvider(), alignmentStep.getStepIdx(), Boolean.class, false); + if (doMergeUnaligned) + { + getJob().setStatus(PipelineJob.TaskStatus.running, "MERGING UNALIGNED READS INTO BAM" + msgSuffix); + getJob().getLogger().info("merging unaligned reads into BAM"); + File idx = SequenceAnalysisService.get().getExpectedBamOrCramIndex(alignmentOutput.getBAM()); + if (idx.exists()) + { + getJob().getLogger().debug("deleting index: " + idx.getPath()); + idx.delete(); + } + + //merge unaligned reads and clean file + MergeBamAlignmentWrapper wrapper = new MergeBamAlignmentWrapper(getJob().getLogger()); + wrapper.executeCommand(referenceGenome.getWorkingFastaFile(), alignmentOutput.getBAM(), inputFiles, null); + getHelper().getFileManager().addCommandsToAction(wrapper.getCommandsExecuted(), alignmentAction); + } + else + { + getJob().getLogger().info("skipping merge of unaligned reads on BAM"); + } + + if (alignmentStep.doAddReadGroups()) + { + getJob().setStatus(PipelineJob.TaskStatus.running, "ADDING READ GROUPS" + msgSuffix); + AddOrReplaceReadGroupsWrapper wrapper = new AddOrReplaceReadGroupsWrapper(getJob().getLogger()); + wrapper.executeCommand(alignmentOutput.getBAM(), null, rs.getReadsetId().toString(), rs.getPlatform(), (platformUnit == null ? rs.getReadsetId().toString() : platformUnit), rs.getName().replaceAll(" ", "_")); + getHelper().getFileManager().addCommandsToAction(wrapper.getCommandsExecuted(), alignmentAction); + } + else + { + getJob().getLogger().info("skipping read group assignment"); + } + + //generate stats + getJob().setStatus(PipelineJob.TaskStatus.running, "Generating BAM Stats"); + FlagStatRunner runner = new FlagStatRunner(getJob().getLogger()); + runner.execute(alignmentOutput.getBAM()); + getHelper().getFileManager().addCommandsToAction(runner.getCommandsExecuted(), alignmentAction); + } _resumer.setReadDataAlignmentDone(lowestReadDataId, actions, alignmentOutput.getBAM()); diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java index 56d4fbb54..eb397a075 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java @@ -377,12 +377,15 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu File outdir = new File(outputDirectory, id); outdir = new File(outdir, "outs"); - File bam = new File(outdir, "possorted_genome_bam.bam"); - if (!bam.exists()) + if (!discardBam) { - throw new PipelineJobException("Unable to find file: " + bam.getPath()); + File bam = new File(outdir, "possorted_genome_bam.bam"); + if (!bam.exists()) + { + throw new PipelineJobException("Unable to find file: " + bam.getPath()); + } + output.setBAM(bam); } - output.setBAM(bam); getWrapper().deleteSymlinks(getWrapper().getLocalFastqDir(outputDirectory)); From 760c35d6f1db4f480eb7dab97655169f0b2b76f9 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 25 Aug 2024 14:36:15 -0700 Subject: [PATCH 31/41] Allow CellRangerVDH to more easily skip BAM creation --- .../org/labkey/singlecell/run/CellRangerVDJWrapper.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index 52e8d8d32..651e3b0f7 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -394,6 +394,9 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List inp lockFile.delete(); } + boolean discardBam = getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); + args.add("--create-bam=" + !discardBam); + getWrapper().execute(args); File outdir = new File(outputDirectory, id); @@ -454,7 +457,6 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List inp private File processOutputsForType(String sampleId, Readset rs, ReferenceGenome referenceGenome, File outdir, AlignmentOutputImpl output, String subdirName) throws PipelineJobException { boolean isPrimaryDir = "vdj_t".equals(subdirName); - String chainType = "vdj_t".equals(subdirName) ? "Alpha/Beta" : "Gamma/Delta"; File multiDir = new File(outdir, "multi/" + subdirName); if (!multiDir.exists()) @@ -487,13 +489,14 @@ private File processOutputsForType(String sampleId, Readset rs, ReferenceGenome } } + boolean discardBam = getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); File bam = new File(sampleDir, "all_contig.bam"); - if (!bam.exists()) + if (!discardBam && !bam.exists()) { throw new PipelineJobException("Unable to find file: " + bam.getPath()); } - if (isPrimaryDir) + if (!discardBam && isPrimaryDir) { output.setBAM(bam); } From f5e1eb73eac72552ed03c8268a38faa3cccf3b61 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 25 Aug 2024 15:54:44 -0700 Subject: [PATCH 32/41] Second fix for alignments that dont produce BAMs --- .../pipeline/SequenceAlignmentTask.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index fcbbaad5b..de1045e3f 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -692,11 +692,13 @@ private void alignSet(Readset rs, String basename, Map toRetain = Arrays.asList(bam, SequenceUtil.getExpectedIndex(bam)); + List toRetain = bam == null ? Collections.emptyList() : Arrays.asList(bam, SequenceUtil.getExpectedIndex(bam)); getTaskFileManagerImpl().deleteIntermediateFiles(toRetain); } - _resumer.setInitialAlignmentDone(bam, alignActions); + AlignmentStep alignmentStep = getHelper().getSingleStep(AlignmentStep.class).create(getHelper()); + boolean discardBam = alignmentStep.getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getJob(), alignmentStep.getProvider(), alignmentStep.getStepIdx(), Boolean.class, false); + _resumer.setInitialAlignmentDone(bam, alignActions, discardBam); } //post-processing @@ -1593,9 +1595,9 @@ public boolean isInitialAlignmentDone() return _mergedBamFile != null; } - public void setInitialAlignmentDone(File mergedBamFile, List actions) throws PipelineJobException + public void setInitialAlignmentDone(File mergedBamFile, List actions, boolean allowNullBam) throws PipelineJobException { - if (mergedBamFile == null) + if (!allowNullBam && mergedBamFile == null) { throw new PipelineJobException("BAM is null"); } From 3a6b43e68f18ee12dfe17d0c591a807d69105ad4 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 26 Aug 2024 08:02:02 -0700 Subject: [PATCH 33/41] Fix for alignments that dont produce BAMs --- .../pipeline/SequenceAlignmentTask.java | 23 ++++++++++++++++--- .../run/bampostprocessing/SortSamStep.java | 2 +- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index de1045e3f..ab54beb8e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -675,6 +675,9 @@ else if (pair.first.equals(pair.second)) private void alignSet(Readset rs, String basename, Map> files, ReferenceGenome referenceGenome) throws IOException, PipelineJobException { + AlignmentStep alignmentStep = getHelper().getSingleStep(AlignmentStep.class).create(getHelper()); + boolean discardBam = alignmentStep.getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getJob(), alignmentStep.getProvider(), alignmentStep.getStepIdx(), Boolean.class, false); + File bam; if (_resumer.isInitialAlignmentDone()) { @@ -696,11 +699,26 @@ private void alignSet(Readset rs, String basename, Map implements BamProcessingStep { - public SortSamStep(PipelineStepProvider provider, PipelineContext ctx) + public SortSamStep(PipelineStepProvider provider, PipelineContext ctx) { super(provider, ctx, new SortSamWrapper(ctx.getLogger())); } From 340ee5a95976e453836095c651a68b4facb898f4 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 26 Aug 2024 08:38:31 -0700 Subject: [PATCH 34/41] Ensure cellranger-dependent steps retain BAM --- .../run/AbstractCellRangerDependentStep.java | 1 - .../singlecell/run/CellRangerGexCountStep.java | 17 ++++++++++++++--- .../singlecell/run/NimbleAlignmentStep.java | 4 ++++ .../singlecell/run/VelocytoAlignmentStep.java | 3 +++ 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/AbstractCellRangerDependentStep.java b/singlecell/src/org/labkey/singlecell/run/AbstractCellRangerDependentStep.java index 4e8f025d0..e87e1cf8b 100644 --- a/singlecell/src/org/labkey/singlecell/run/AbstractCellRangerDependentStep.java +++ b/singlecell/src/org/labkey/singlecell/run/AbstractCellRangerDependentStep.java @@ -36,7 +36,6 @@ protected File runCellRanger(AlignmentOutputImpl output, Readset rs, List File localBam = new File(outputDirectory, basename + ".cellranger.bam"); File localBamIdx = SequenceAnalysisService.get().getExpectedBamOrCramIndex(localBam); - String idParam = StringUtils.trimToNull(getProvider().getParameterByName("id").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class)); File cellrangerOutdir = new File(outputDirectory, CellRangerWrapper.getId(idParam, rs)); diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java index eb397a075..e47ea092b 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java @@ -319,6 +319,18 @@ public boolean canAlignMultiplePairsAtOnce() return true; } + private boolean shouldDiscardBam() + { + return !_alwaysRetainBam && getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); + } + + private boolean _alwaysRetainBam = false; + + public void setAlwaysRetainBam(boolean alwaysRetainBam) + { + _alwaysRetainBam = alwaysRetainBam; + } + @Override public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nullable List inputFastqs2, File outputDirectory, ReferenceGenome referenceGenome, String basename, String readGroupId, @Nullable String platformUnit) throws PipelineJobException { @@ -359,8 +371,7 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu File indexDir = AlignerIndexUtil.getIndexDir(referenceGenome, getIndexCachedDirName(getPipelineCtx().getJob())); args.add("--transcriptome=" + indexDir.getPath()); - boolean discardBam = getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); - args.add("--create-bam=" + !discardBam); + args.add("--create-bam=" + !shouldDiscardBam()); getWrapper().setWorkingDir(outputDirectory); @@ -377,7 +388,7 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu File outdir = new File(outputDirectory, id); outdir = new File(outdir, "outs"); - if (!discardBam) + if (!shouldDiscardBam()) { File bam = new File(outdir, "possorted_genome_bam.bam"); if (!bam.exists()) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java index ba5044721..04d65c79b 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java @@ -67,6 +67,10 @@ public static List getToolParameters() public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nullable List inputFastqs2, File outputDirectory, ReferenceGenome referenceGenome, String basename, String readGroupId, @Nullable String platformUnit) throws PipelineJobException { AlignmentOutputImpl output = new AlignmentOutputImpl(); + + // We need to ensure we keep the BAM for post-processing: + setAlwaysRetainBam(true); + File localBam = runCellRanger(output, rs, inputFastqs1, inputFastqs2, outputDirectory, referenceGenome, basename, readGroupId, platformUnit); File crDir = new File(localBam.getPath().replace(".nimble.cellranger.bam", "")); diff --git a/singlecell/src/org/labkey/singlecell/run/VelocytoAlignmentStep.java b/singlecell/src/org/labkey/singlecell/run/VelocytoAlignmentStep.java index 06bcb3c4d..689e92307 100644 --- a/singlecell/src/org/labkey/singlecell/run/VelocytoAlignmentStep.java +++ b/singlecell/src/org/labkey/singlecell/run/VelocytoAlignmentStep.java @@ -67,6 +67,9 @@ public AlignmentStep create(PipelineContext context) public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nullable List inputFastqs2, File outputDirectory, ReferenceGenome referenceGenome, String basename, String readGroupId, @Nullable String platformUnit) throws PipelineJobException { AlignmentOutputImpl output = new AlignmentOutputImpl(); + + setAlwaysRetainBam(true); + File localBam = runCellRanger(output, rs, inputFastqs1, inputFastqs2, outputDirectory, referenceGenome, basename, readGroupId, platformUnit); File gtf = getPipelineCtx().getSequenceSupport().getCachedData(getProvider().getParameterByName("gtfFile").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class)); From beb1bacf6d6152bd4d199a2d96e7e4f0a88a5770 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 26 Aug 2024 09:18:34 -0700 Subject: [PATCH 35/41] Bugfix to BAM-less jobs --- .../labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java | 2 +- .../labkey/singlecell/run/AbstractCellRangerDependentStep.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java index 146bf959c..384d34a93 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java @@ -365,7 +365,7 @@ private void processAnalyses(AnalysisModelImpl analysisModel, int runId, List provider, PipelineContext ctx, CellRangerWrapper wrapper) { super(provider, ctx, wrapper); } From d11cec06dad5f2ef6ca07f07a4c5d041ed3abf78 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 26 Aug 2024 11:34:01 -0700 Subject: [PATCH 36/41] Update CellRanger metrics for BAM-less jobs --- .../run/CellRangerGexCountStep.java | 113 ++---------------- .../singlecell/run/CellRangerVDJWrapper.java | 29 ++--- 2 files changed, 25 insertions(+), 117 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java index e47ea092b..927c9c7e6 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java @@ -475,7 +475,16 @@ public boolean alwaysCopyIndexToWorkingDir() @Override public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Collection outputFilesCreated) throws PipelineJobException { - File metrics = new File(model.getAlignmentFileObject().getParentFile(), "metrics_summary.csv"); + SequenceOutputFile outputForData = outputFilesCreated.stream().filter(x -> LOUPE_CATEGORY.equals(x.getCategory())).findFirst().orElse(null); + if (outputForData == null) + { + outputForData = outputFilesCreated.stream().filter(x -> "10x Run Summary".equals(x.getCategory())).findFirst().orElseThrow(); + } + + File outsDir = outputForData.getFile().getParentFile(); + Integer dataId = outputForData.getDataId(); + + File metrics = new File(outsDir, "metrics_summary.csv"); if (metrics.exists()) { getPipelineCtx().getLogger().debug("adding 10x metrics"); @@ -501,17 +510,12 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co i++; } - if (model.getAlignmentFile() == null) - { - throw new PipelineJobException("model.getAlignmentFile() was null"); - } - TableInfo ti = DbSchema.get("sequenceanalysis", DbSchemaType.Module).getTable("quality_metrics"); //NOTE: if this job errored and restarted, we may have duplicate records: SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), model.getReadset()); filter.addCondition(FieldKey.fromString("analysis_id"), model.getRowId(), CompareType.EQUAL); - filter.addCondition(FieldKey.fromString("dataid"), model.getAlignmentFile(), CompareType.EQUAL); + filter.addCondition(FieldKey.fromString("dataid"), dataId, CompareType.EQUAL); filter.addCondition(FieldKey.fromString("category"), "Cell Ranger", CompareType.EQUAL); filter.addCondition(FieldKey.fromString("container"), getPipelineCtx().getJob().getContainer().getId(), CompareType.EQUAL); TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null); @@ -531,7 +535,7 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co toInsert.put("created", new Date()); toInsert.put("readset", model.getReadset()); toInsert.put("analysis_id", model.getRowId()); - toInsert.put("dataid", model.getAlignmentFile()); + toInsert.put("dataid", dataId); toInsert.put("category", "Cell Ranger"); toInsert.put("metricname", header[j]); @@ -593,97 +597,4 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co } } } - - private void addMetrics(File outDir, AnalysisModel model) throws PipelineJobException - { - getPipelineCtx().getLogger().debug("adding 10x metrics"); - - File metrics = new File(outDir, "metrics_summary.csv"); - if (!metrics.exists()) - { - throw new PipelineJobException("Unable to find file: " + metrics.getPath()); - } - - if (model.getAlignmentFile() == null) - { - throw new PipelineJobException("model.getAlignmentFile() was null"); - } - - try (CSVReader reader = new CSVReader(Readers.getReader(metrics))) - { - String[] line; - List metricValues = new ArrayList<>(); - - int i = 0; - while ((line = reader.readNext()) != null) - { - i++; - if (i == 1) - { - continue; - } - - metricValues.add(line); - } - - int totalAdded = 0; - TableInfo ti = DbSchema.get("sequenceanalysis", DbSchemaType.Module).getTable("quality_metrics"); - - //NOTE: if this job errored and restarted, we may have duplicate records: - SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), model.getReadset()); - filter.addCondition(FieldKey.fromString("analysis_id"), model.getRowId(), CompareType.EQUAL); - filter.addCondition(FieldKey.fromString("dataid"), model.getAlignmentFile(), CompareType.EQUAL); - filter.addCondition(FieldKey.fromString("category"), "Cell Ranger VDJ", CompareType.EQUAL); - filter.addCondition(FieldKey.fromString("container"), getPipelineCtx().getJob().getContainer().getId(), CompareType.EQUAL); - TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null); - if (ts.exists()) - { - getPipelineCtx().getLogger().info("Deleting existing QC metrics (probably from prior restarted job)"); - ts.getArrayList(Integer.class).forEach(rowid -> { - Table.delete(ti, rowid); - }); - } - - for (String[] row : metricValues) - { - //TODO - if ("Fastq ID".equals(row[2]) || "Physical library ID".equals(row[2])) - { - continue; - } - - Map toInsert = new CaseInsensitiveHashMap<>(); - toInsert.put("container", getPipelineCtx().getJob().getContainer().getId()); - toInsert.put("createdby", getPipelineCtx().getJob().getUser().getUserId()); - toInsert.put("created", new Date()); - toInsert.put("readset", model.getReadset()); - toInsert.put("analysis_id", model.getRowId()); - toInsert.put("dataid", model.getAlignmentFile()); - - toInsert.put("category", "Cell Ranger"); - toInsert.put("metricname", row[4]); - - row[5] = row[5].replaceAll(",", ""); //remove commas - Object val = row[5]; - if (row[5].contains("%")) - { - row[5] = row[5].replaceAll("%", ""); - Double d = ConvertHelper.convert(row[5], Double.class); - d = d / 100.0; - val = d; - } - - toInsert.put("metricvalue", val); - - Table.insert(getPipelineCtx().getJob().getUser(), ti, toInsert); - totalAdded++; - } - - getPipelineCtx().getLogger().info("total metrics added: " + totalAdded); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } } diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index 651e3b0f7..1d973adc3 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -70,6 +70,7 @@ public CellRangerVDJWrapper(@Nullable Logger logger) } public static final String INNER_ENRICHMENT_PRIMERS = "innerEnrichmentPrimers"; + public static final String VLOUPE_CATEGORY = "10x VLoupe"; public static class VDJProvider extends AbstractAlignmentStepProvider { @@ -527,7 +528,7 @@ private File processOutputsForType(String sampleId, Readset rs, ReferenceGenome else if (isPrimaryDir) { String versionString = "Version: " + getWrapper().getVersionString(); - output.addSequenceOutput(outputVloupe, rs.getName() + " 10x VLoupe", "10x VLoupe", rs.getRowId(), null, referenceGenome.getGenomeId(), versionString); + output.addSequenceOutput(outputVloupe, rs.getName() + " 10x VLoupe", VLOUPE_CATEGORY, rs.getRowId(), null, referenceGenome.getGenomeId(), versionString); } output.addIntermediateFile(new File(sampleDir, "airr_rearrangement.tsv")); @@ -678,7 +679,7 @@ public void deleteSymlinks(File localFqDir) throws PipelineJobException } } - public void addMetrics(File outDir, AnalysisModel model) throws PipelineJobException + public void addMetrics(File outDir, AnalysisModel model, int dataId) throws PipelineJobException { getPipelineCtx().getLogger().debug("adding 10x metrics"); @@ -688,11 +689,6 @@ public void addMetrics(File outDir, AnalysisModel model) throws PipelineJobExcep throw new PipelineJobException("Unable to find file: " + metrics.getPath()); } - if (model.getAlignmentFile() == null) - { - throw new PipelineJobException("model.getAlignmentFile() was null"); - } - try (CSVReader reader = new CSVReader(Readers.getReader(metrics))) { String[] line; @@ -716,7 +712,7 @@ public void addMetrics(File outDir, AnalysisModel model) throws PipelineJobExcep //NOTE: if this job errored and restarted, we may have duplicate records: SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), model.getReadset()); filter.addCondition(FieldKey.fromString("analysis_id"), model.getRowId(), CompareType.EQUAL); - filter.addCondition(FieldKey.fromString("dataid"), model.getAlignmentFile(), CompareType.EQUAL); + filter.addCondition(FieldKey.fromString("dataid"), dataId, CompareType.EQUAL); filter.addCondition(FieldKey.fromString("category"), "Cell Ranger VDJ", CompareType.EQUAL); filter.addCondition(FieldKey.fromString("container"), getPipelineCtx().getJob().getContainer().getId(), CompareType.EQUAL); TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null); @@ -741,7 +737,7 @@ public void addMetrics(File outDir, AnalysisModel model) throws PipelineJobExcep toInsert.put("created", new Date()); toInsert.put("readset", model.getReadset()); toInsert.put("analysis_id", model.getRowId()); - toInsert.put("dataid", model.getAlignmentFile()); + toInsert.put("dataid", dataId); toInsert.put("category", "Cell Ranger VDJ"); @@ -784,15 +780,16 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co throw new PipelineJobException("Expected sequence outputs to be created"); } - File html = outputFilesCreated.stream().filter(x -> "10x Run Summary".equals(x.getCategory())).findFirst().orElseThrow().getFile(); - - addMetrics(html.getParentFile(), model); - - File bam = model.getAlignmentData().getFile(); - if (!bam.exists()) + SequenceOutputFile outputForData = outputFilesCreated.stream().filter(x -> VLOUPE_CATEGORY.equals(x.getCategory())).findFirst().orElse(null); + if (outputForData == null) { - getPipelineCtx().getLogger().warn("BAM not found, expected: " + bam.getPath()); + outputForData = outputFilesCreated.stream().filter(x -> "10x Run Summary".equals(x.getCategory())).findFirst().orElseThrow(); } + + File outsDir = outputForData.getFile().getParentFile(); + Integer dataId = outputForData.getDataId(); + + addMetrics(outsDir, model, dataId); } private static final Pattern FILE_PATTERN = Pattern.compile("^(.+?)(_S[0-9]+){0,1}_L(.+?)_(R){0,1}([0-9])(_[0-9]+){0,1}(.*?)(\\.f(ast){0,1}q)(\\.gz)?$"); From cb345c17d24e79fae90f0901369b7dad534edf94 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 26 Aug 2024 11:45:06 -0700 Subject: [PATCH 37/41] Bugfix to FeaturePlots.R when tsne not present --- singlecell/resources/chunks/FeaturePlots.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/singlecell/resources/chunks/FeaturePlots.R b/singlecell/resources/chunks/FeaturePlots.R index 1cbc3d20a..31a492ce2 100644 --- a/singlecell/resources/chunks/FeaturePlots.R +++ b/singlecell/resources/chunks/FeaturePlots.R @@ -8,19 +8,19 @@ for (datasetId in names(seuratObjects)) { } tryCatch({ - P1 <- Seurat::FeaturePlot(seuratObj, features = c(field), reduction = 'tsne', min.cutoff = 'q05', max.cutoff = 'q95') - P2 <- Seurat::FeaturePlot(seuratObj, features = c(field), reduction = 'umap', min.cutoff = 'q05', max.cutoff = 'q95') - - if ('wnn.umap' %in% names(seuratObj@reductions)) { - P3 <- Seurat::FeaturePlot(seuratObj, features = c(field), reduction = 'wnn.umap', min.cutoff = 'q05', max.cutoff = 'q95') - P1 <- P1 | P2 | P3 - } else { - P1 <- P1 | P2 + reductions <- intersect(c('tsne', 'umap', 'wnn.umap'), names(seuratObj@reductions)) + if (length(reductions) == 0) { + stop('No reductions found to plot') } - P1 <- P1 + patchwork::plot_annotation(title = field) + patchwork::plot_layout(guides = "collect") + plotList <- list() + i <- 0 + for (reduction in reductions) { + i <- i + 1 + plotList[[i]] <- Seurat::FeaturePlot(seuratObj, features = c(field), reduction = reduction, min.cutoff = 'q05', max.cutoff = 'q95') + } - print(P1) + print(patchwork::wrap_plots(plotList, ncol = length(reductions)) + patchwork::plot_annotation(title = field) + patchwork::plot_layout(guides = "collect")) }, error = function(e){ print(paste0('Error running FeaturePlots for: ', datasetId)) print(conditionMessage(e)) From 53306870c3f7ae0863082d70c6a881cb8e00b330 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 26 Aug 2024 12:01:23 -0700 Subject: [PATCH 38/41] Add validation to Cell Ranger steps --- .../src/org/labkey/singlecell/run/CellRangerGexCountStep.java | 4 ++++ .../src/org/labkey/singlecell/run/CellRangerVDJWrapper.java | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java index 927c9c7e6..6981ae11f 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java @@ -483,6 +483,10 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co File outsDir = outputForData.getFile().getParentFile(); Integer dataId = outputForData.getDataId(); + if (dataId == null) + { + throw new PipelineJobException("Unable to find dataId for output file"); + } File metrics = new File(outsDir, "metrics_summary.csv"); if (metrics.exists()) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index 1d973adc3..aac145d40 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -788,6 +788,10 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co File outsDir = outputForData.getFile().getParentFile(); Integer dataId = outputForData.getDataId(); + if (dataId == null) + { + throw new PipelineJobException("Unable to find dataId for output file"); + } addMetrics(outsDir, model, dataId); } From 93ec686260cce30be81bc707cb47bf9c923499d3 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 26 Aug 2024 12:58:23 -0700 Subject: [PATCH 39/41] Change order of steps when no BAM present --- .../pipeline/SequenceAnalysisTask.java | 24 +++++++++---------- .../run/CellRangerGexCountStep.java | 5 ++++ 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java index 384d34a93..7ad8574c9 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java @@ -365,6 +365,18 @@ private void processAnalyses(AnalysisModelImpl analysisModel, int runId, List Date: Mon, 26 Aug 2024 15:37:24 -0700 Subject: [PATCH 40/41] Reduce log level when BAM is discarded --- .../pipeline/SequenceAnalysisTask.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java index 7ad8574c9..8881a7d96 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java @@ -380,8 +380,15 @@ private void processAnalyses(AnalysisModelImpl analysisModel, int runId, List Date: Wed, 28 Aug 2024 06:10:49 -0700 Subject: [PATCH 41/41] Walk back create-bam arg in CellRangerVDJWrapper --- .../web/SequenceAnalysis/panel/AnalysisSectionPanel.js | 2 ++ .../org/labkey/singlecell/run/CellRangerVDJWrapper.java | 8 ++------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AnalysisSectionPanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AnalysisSectionPanel.js index 8522f184c..d724dd672 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AnalysisSectionPanel.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AnalysisSectionPanel.js @@ -203,6 +203,8 @@ Ext4.define('SequenceAnalysis.panel.AnalysisSectionPanel', { title: 'Add Steps', border: false, width: 800, + autoScroll: true, + maxHeight: '90%', items: items, buttons: [{ text: 'Done', diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index aac145d40..12fa0555a 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -395,9 +395,6 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List inp lockFile.delete(); } - boolean discardBam = getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); - args.add("--create-bam=" + !discardBam); - getWrapper().execute(args); File outdir = new File(outputDirectory, id); @@ -490,14 +487,13 @@ private File processOutputsForType(String sampleId, Readset rs, ReferenceGenome } } - boolean discardBam = getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); File bam = new File(sampleDir, "all_contig.bam"); - if (!discardBam && !bam.exists()) + if (!bam.exists()) { throw new PipelineJobException("Unable to find file: " + bam.getPath()); } - if (!discardBam && isPrimaryDir) + if (isPrimaryDir) { output.setBAM(bam); }