diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AnalysisSectionPanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AnalysisSectionPanel.js index 117296416..0b125221c 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AnalysisSectionPanel.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AnalysisSectionPanel.js @@ -214,7 +214,7 @@ Ext4.define('SequenceAnalysis.panel.AnalysisSectionPanel', { }], listeners: { show: function(win){ - if (win.getHeight() > Ext4.getBody().getHeight()) { + if (win.getHeight() > window.visualViewport.height) { win.alignTo(Ext4.getBody(), 't-t?'); } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index ac5215794..9b4cfb4c5 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -302,6 +302,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new MendelianViolationReportStep.Provider()); SequencePipelineService.get().registerPipelineStep(new SummarizeGenotypeQualityStep.Provider()); SequencePipelineService.get().registerPipelineStep(new BcftoolsFillTagsStep.Provider()); + SequencePipelineService.get().registerPipelineStep(new BcftoolsFixploidyStep.Provider()); SequencePipelineService.get().registerPipelineStep(new SVAnnotateStep.Provider()); //handlers diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index 8a460ce6f..b254cb86d 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -707,7 +707,7 @@ private void alignSet(Readset rs, String basename, Map getProtocolActionNames() } @Override - public PipelineJob.Task createTask(PipelineJob job) + public PipelineJob.Task createTask(PipelineJob job) { SequenceAnalysisTask task = new SequenceAnalysisTask(this, job); setJoin(true); @@ -202,7 +202,7 @@ public RecordedActionSet run() throws PipelineJobException //find BAM List datas = run.getInputDatas(SequenceAlignmentTask.FINAL_BAM_ROLE, ExpProtocol.ApplicationType.ExperimentRunOutput); - if (datas.size() > 0) + if (!datas.isEmpty()) { boolean found = false; for (ExpData d : datas) @@ -261,7 +261,7 @@ else if (!d.getFile().exists()) else { List fastaDatas = run.getInputDatas(IndexOutputImpl.REFERENCE_DB_FASTA, null); - if (fastaDatas.size() > 0) + if (!fastaDatas.isEmpty()) { for (ExpData d : fastaDatas) { @@ -289,7 +289,7 @@ else if (d.getFile().exists()) //input FASTQs datas = run.getInputDatas(SequenceTaskHelper.FASTQ_DATA_INPUT_NAME, ExpProtocol.ApplicationType.ProtocolApplication); - if (datas.size() > 0) + if (!datas.isEmpty()) { for (ExpData d : datas) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BcftoolsFixploidyStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BcftoolsFixploidyStep.java new file mode 100644 index 000000000..f5c7cf153 --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BcftoolsFixploidyStep.java @@ -0,0 +1,110 @@ +package org.labkey.sequenceanalysis.run.analysis; + +import htsjdk.samtools.util.Interval; +import org.apache.commons.lang3.StringUtils; +import org.jetbrains.annotations.Nullable; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.SequenceAnalysisService; +import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep; +import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl; +import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; +import org.labkey.sequenceanalysis.pipeline.SequenceTaskHelper; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +public class BcftoolsFixploidyStep extends AbstractCommandPipelineStep implements VariantProcessingStep +{ + public BcftoolsFixploidyStep(PipelineStepProvider provider, PipelineContext ctx) + { + super(provider, ctx, new BcftoolsRunner(ctx.getLogger())); + } + + public static class Provider extends AbstractVariantProcessingStepProvider implements SupportsScatterGather + { + public Provider() + { + super("BcftoolsFixploidyStep", "Bcftools Fixploidy", "bcftools", "Can be used to convert single dot ('.') genotypes to './.' for compatibility with some tools.", Arrays.asList( + + ), null, null); + } + + @Override + public BcftoolsFixploidyStep create(PipelineContext ctx) + { + return new BcftoolsFixploidyStep(this, ctx); + } + } + + @Override + public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List intervals) throws PipelineJobException + { + VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl(); + + List options = new ArrayList<>(); + options.add(BcftoolsRunner.getBcfToolsPath().getPath()); + options.add("+fixploidy"); + + options.add(inputVCF.getPath()); + + if (intervals != null) + { + options.add("--regions"); + options.add(intervals.stream().map(interval -> interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd()).collect(Collectors.joining(","))); + } + + options.add("-O"); + options.add("z9"); + + Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()); + if (threads != null) + { + options.add("--threads"); + options.add(threads.toString()); + } + + File outputVcf = new File(outputDirectory, SequenceTaskHelper.getUnzippedBaseName(inputVCF) + ".ft.vcf.gz"); + options.add("-o"); + options.add(outputVcf.getPath()); + + options.add("--"); + + BcftoolsRunner wrapper = getWrapper(); + + String bcfPluginDir = StringUtils.trimToNull(System.getenv("BCFTOOLS_PLUGINS")); + if (bcfPluginDir != null) + { + getPipelineCtx().getLogger().debug("Setting BCFTOOLS_PLUGINS environment variable: " + bcfPluginDir); + wrapper.addToEnvironment("BCFTOOLS_PLUGINS", bcfPluginDir); + } + + wrapper.execute(options); + if (!outputVcf.exists()) + { + throw new PipelineJobException("output not found: " + outputVcf); + } + + try + { + SequenceAnalysisService.get().ensureVcfIndex(outputVcf, getWrapper().getLogger()); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + output.setVcf(outputVcf); + + return output; + } +} diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java index a35d003b6..0fbd9a075 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java @@ -1,5 +1,6 @@ package org.labkey.sequenceanalysis.util; +import com.google.common.io.Files; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFormatException; import htsjdk.samtools.SAMReadGroupRecord; @@ -514,13 +515,23 @@ else if (!samples.equals(header.getGenotypeSamples())) writer.write("} | bgzip -f" + (compressionLevel == null ? "" : " --compress-level 9") + (threads == null ? "" : " --threads " + threads) + " > '" + outputGzip.getPath() + "'\n"); } - SimpleScriptWrapper wrapper = new SimpleScriptWrapper(log); - wrapper.execute(Arrays.asList("/bin/bash", bashTmp.getPath())); - - if (sortAfterMerge) + File mergeDone = new File(outputGzip.getParentFile(), "merge.done"); + if (mergeDone.exists()) { - log.debug("sorting VCF"); - sortROD(outputGzip, log, 2); + log.debug("Merge done file exists, will not repeat merge"); + } + else + { + SimpleScriptWrapper wrapper = new SimpleScriptWrapper(log); + wrapper.execute(Arrays.asList("/bin/bash", bashTmp.getPath())); + + if (sortAfterMerge) + { + log.debug("sorting VCF"); + sortROD(outputGzip, log, 2); + } + + Files.touch(mergeDone); } SequenceAnalysisService.get().ensureVcfIndex(outputGzip, log); @@ -539,6 +550,8 @@ else if (!samples.equals(header.getGenotypeSamples())) { headerIdx.delete(); } + + mergeDone.delete(); } catch (IOException e) { diff --git a/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantWidget/ExtendedVariantWidget.tsx b/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantWidget/ExtendedVariantWidget.tsx index 594bce952..1fe878691 100644 --- a/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantWidget/ExtendedVariantWidget.tsx +++ b/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantWidget/ExtendedVariantWidget.tsx @@ -208,7 +208,7 @@ export default jbrowse => { const gt = samples[sample]["GT"] for (let genotype of gt){ const nc = "No Call" - if (genotype === "./." || genotype === ".|."){ + if (genotype === "./." || genotype === ".|." || genotype === '.'){ gtCounts[nc] = gtCounts[nc] ? gtCounts[nc] + 1 : 1 gtTotal = gtTotal + 1 } diff --git a/jbrowse/src/client/JBrowse/utils.ts b/jbrowse/src/client/JBrowse/utils.ts index de51e775e..e69b0b3e7 100644 --- a/jbrowse/src/client/JBrowse/utils.ts +++ b/jbrowse/src/client/JBrowse/utils.ts @@ -80,7 +80,7 @@ export function passesSampleFilters(feature, sampleIDs){ } export function isVariant(gt) { - return !(gt === "./." || gt === ".|." || gt === "0/0" || gt === "0|0") + return !(gt === "./." || gt === ".|." || gt === '.' || gt === "0/0" || gt === "0|0") } diff --git a/singlecell/resources/chunks/Functions.R b/singlecell/resources/chunks/Functions.R index 622e88e48..71b18e449 100644 --- a/singlecell/resources/chunks/Functions.R +++ b/singlecell/resources/chunks/Functions.R @@ -81,7 +81,7 @@ saveData <- function(seuratObj, datasetId) { logger::log_info(paste0('Saving RDS file: ', fn, ' with ', ncol(seuratObj), ' cells')) barcodeFile <- paste0(outputPrefix, '.', datasetIdForFile, '.cellBarcodes.csv') - metaFile <- paste0(outputPrefix, '.', datasetIdForFile, '.seurat.meta.txt') + metaFile <- paste0(outputPrefix, '.', datasetIdForFile, '.seurat.meta.txt.gz') saveRDS(seuratObj, file = fn) diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 85355473f..2675311c2 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -1573,7 +1573,7 @@ public File getMetaTableFromSeurat(File seuratObj) public File getMetaTableFromSeurat(File seuratObj, boolean throwIfNotFound) { - File barcodes = new File(seuratObj.getParentFile(), seuratObj.getName().replaceAll("seurat.rds$", "seurat.meta.txt")); + File barcodes = new File(seuratObj.getParentFile(), seuratObj.getName().replaceAll("seurat.rds$", "seurat.meta.txt.gz")); if (throwIfNotFound && !barcodes.exists()) { throw new IllegalArgumentException("Unable to find expected metadata file. This might indicate the seurat object was created with an older version of the pipeline. Expected: " + barcodes.getPath()); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/FilterRawCounts.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/FilterRawCounts.java index e125709b4..7a42c13c1 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/FilterRawCounts.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/FilterRawCounts.java @@ -25,13 +25,13 @@ public Provider() }}, 0, "nCount_RNA.low", false), SeuratToolParameter.create("nCountRnaHigh", "Max UMI Count", "Cells with UMI counts above this value will be discarded", "ldk-integerfield", new JSONObject(){{ put("minValue", 0); - }}, 20000, "nCount_RNA.high", false), + }}, 30000, "nCount_RNA.high", false), SeuratToolParameter.create("nCountFeatureLow", "Min Feature Count", "Cells with unique feature totals below this value will be discarded", "ldk-integerfield", new JSONObject(){{ put("minValue", 0); - }}, 200, "nFeature.low", false), + }}, 150, "nFeature.low", false), SeuratToolParameter.create("nCountFeatureHigh", "Max Feature Count", "Cells with unique feature totals above this value will be discarded", "ldk-integerfield", new JSONObject(){{ put("minValue", 0); - }}, 5000, "nFeature.high", false), + }}, 7500, "nFeature.high", false), SeuratToolParameter.create("pMitoLow", "Min Percent Mito", "Cells percent mitochondrial genes below this value will be discarded", "ldk-numberfield", new JSONObject(){{ put("minValue", 0); put("maxValue", 1); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PrepareRawCounts.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PrepareRawCounts.java index 874eecf00..f317fd90e 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PrepareRawCounts.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PrepareRawCounts.java @@ -39,10 +39,10 @@ public Provider() }}, 0.001), SeuratToolParameter.create("maxAllowableCells", "Max Cells Allowed", "If more than this many cells are predicted by EmptyDrops, the job will fail", "ldk-integerfield", new JSONObject(){{ put("minValue", 0); - }}, 20000), + }}, 35000), SeuratToolParameter.create("minAllowableCells", "Min Cells Allowed", "If fewer than this many cells are predicted by EmptyDrops, the job will fail", "ldk-integerfield", new JSONObject(){{ put("minValue", 0); - }}, 1500), + }}, 500), SeuratToolParameter.create("useEmptyDropsCellRanger", "Use emptyDropsCellRanger", "If checked, this will run emptyDropsCellRanger instead of emptyDrops", "checkbox", null, false), SeuratToolParameter.create("nExpectedCells", "# Expected Cells", "Only applied if emptyDropsCellRanger is selected. Passed to n.expected.cells argument", "ldk-integerfield", new JSONObject(){{ put("minValue", 0); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunConga.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunConga.java index ca1711558..e6a2a4622 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunConga.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunConga.java @@ -37,7 +37,7 @@ public Provider() }}, "conga"), SeuratToolParameter.create("fieldToIterate", "Field to Iterate", "If provided, in addition to running CoNGA for the entire dataset, it will iterate the values of this field, subset the data by this value, and run CoNGA on that subset. The resulting metadata will be saved with the field name pattern: {congaMetadataPrefix}{FieldValue}", "textfield", new JSONObject(){{ - }}, "SubjectId", "fieldToIterate", true), + }}, null, "fieldToIterate", true), SeuratToolParameter.create("assayName", "Assay Name", "The name of the assay holding the GEX data", "textfield", new JSONObject(){{ }}, "RNA"), diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/UpdateSeuratPrototype.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/UpdateSeuratPrototype.java index d44f48a40..690891471 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/UpdateSeuratPrototype.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/UpdateSeuratPrototype.java @@ -15,6 +15,7 @@ import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; import org.labkey.api.singlecell.pipeline.SeuratToolParameter; import org.labkey.api.singlecell.pipeline.SingleCellStep; +import org.labkey.singlecell.CellHashingServiceImpl; import org.labkey.singlecell.SingleCellSchema; import java.io.File; @@ -140,6 +141,24 @@ public Output execute(SequenceOutputHandler.JobContext ctx, List getCellRangerGexParams(@Nullable Lis ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("--chemistry"), "chemistry", "Chemistry", "This is usually left blank, in which case cellranger will auto-detect. Example values are: SC3Pv1, SC3Pv2, SC3Pv3, SC5P-PE, SC5P-R2, or SC5P-R1", "textfield", new JSONObject(){{ }}, null), - ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--include-introns"), "includeIntrons", "Include Introns", "If selected, reads from introns will be included in the counts", "ldk-simplecombo", new JSONObject(){{ + ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("--include-introns"), "includeIntrons", "Include Introns", "If selected, reads from introns will be included in the counts", "ldk-simplecombo", new JSONObject(){{ put("storeValues", "true;false"); - put("value", "false"); - }}, null) + put("value", "true"); + }}, "true") ); if (additionalParams != null) @@ -321,6 +322,12 @@ public boolean canAlignMultiplePairsAtOnce() private boolean shouldDiscardBam() { + // NOTE: if downstream analyses are selected, always keep BAM + if (!SequencePipelineService.get().getSteps(getPipelineCtx().getJob(), AnalysisStep.class).isEmpty()) + { + return false; + } + return !_alwaysRetainBam && getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); }