Skip to content

Commit

Permalink
group methods in StructuralVariationDiscoveryPipelineSpark by functio…
Browse files Browse the repository at this point in the history
…nality
  • Loading branch information
SHuang-Broad committed Jul 10, 2018
1 parent c7c8c6e commit ece04d2
Showing 1 changed file with 77 additions and 71 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -155,13 +155,6 @@ protected void runTool( final JavaSparkContext ctx ) {

validateParams();

Utils.validate(evidenceAndAssemblyArgs.externalEvidenceFile == null || discoverStageArgs.cnvCallsFile == null,
"Please only specify one of externalEvidenceFile or cnvCallsFile");

if (discoverStageArgs.cnvCallsFile != null) {
evidenceAndAssemblyArgs.externalEvidenceFile = discoverStageArgs.cnvCallsFile;
}

JavaRDD<GATKRead> unfilteredReads = getUnfilteredReads();
final SAMFileHeader headerForReads = getHeaderForReads();

Expand Down Expand Up @@ -206,9 +199,18 @@ protected void runTool( final JavaSparkContext ctx ) {
}
}

// init ============================================================================================================

private void validateParams() {
evidenceAndAssemblyArgs.validate();
discoverStageArgs.validate();

Utils.validate(evidenceAndAssemblyArgs.externalEvidenceFile == null || discoverStageArgs.cnvCallsFile == null,
"Please only specify one of externalEvidenceFile or cnvCallsFile");

if (discoverStageArgs.cnvCallsFile != null) {
evidenceAndAssemblyArgs.externalEvidenceFile = discoverStageArgs.cnvCallsFile;
}
}

private SvDiscoveryInputMetaData getSvDiscoveryInputData(final JavaSparkContext ctx,
Expand All @@ -234,49 +236,43 @@ private SvDiscoveryInputMetaData getSvDiscoveryInputData(final JavaSparkContext
cnvCallsBroadcast, getHeaderForReads(), getReference(), localLogger);
}

/**
* Uses the input EvidenceTargetLinks to
* <ul>
* <li>
* either annotate the variants called from assembly discovery with split read and read pair evidence, or
* </li>
* <li>
* to call new imprecise variants if the number of pieces of evidence exceeds a given threshold.
* </li>
* </ul>
*
*/
private static List<VariantContext> processEvidenceTargetLinks(List<VariantContext> assemblyBasedVariants,
final SvDiscoveryInputMetaData svDiscoveryInputMetaData) {

final List<VariantContext> annotatedVariants;
if (svDiscoveryInputMetaData.getSampleSpecificData().getEvidenceTargetLinks() != null) {
final PairedStrandedIntervalTree<EvidenceTargetLink> evidenceTargetLinks = svDiscoveryInputMetaData.getSampleSpecificData().getEvidenceTargetLinks();
final ReadMetadata readMetadata = svDiscoveryInputMetaData.getSampleSpecificData().getReadMetadata();
final ReferenceMultiSource reference = svDiscoveryInputMetaData.getReferenceData().getReferenceBroadcast().getValue();
final VariantsDiscoveryFromContigsAlignmentsSparkArgumentCollection discoverStageArgs = svDiscoveryInputMetaData.getDiscoverStageArgs();
final Logger toolLogger = svDiscoveryInputMetaData.getToolLogger();
public static Broadcast<SVIntervalTree<VariantContext>> broadcastCNVCalls(final JavaSparkContext ctx,
final SAMFileHeader header,
final String cnvCallsFile) {
final SVIntervalTree<VariantContext> cnvCalls;
if (cnvCallsFile != null) {
cnvCalls = CNVInputReader.loadCNVCalls(cnvCallsFile, header);
} else {
cnvCalls = null;
}

// annotate with evidence links
annotatedVariants = AnnotatedVariantProducer.
annotateBreakpointBasedCallsWithImpreciseEvidenceLinks(assemblyBasedVariants,
evidenceTargetLinks, readMetadata, reference, discoverStageArgs, toolLogger);
final Broadcast<SVIntervalTree<VariantContext>> broadcastCNVCalls;
if (cnvCalls != null) {
broadcastCNVCalls = ctx.broadcast(cnvCalls);
} else {
broadcastCNVCalls = null;
}
return broadcastCNVCalls;
}

// then also imprecise deletion
final List<VariantContext> impreciseVariants = ImpreciseVariantDetector.
callImpreciseDeletionFromEvidenceLinks(evidenceTargetLinks, readMetadata, reference,
discoverStageArgs.impreciseVariantEvidenceThreshold,
discoverStageArgs.maxCallableImpreciseVariantDeletionSize,
toolLogger);
/**
* Makes a PairedStrandedIntervalTree from a list of EvidenceTargetLinks. The value of each entry in the resulting tree
* will be the original EvidenceTargetLink. If the input list is null, returns a null tree.
*/
private PairedStrandedIntervalTree<EvidenceTargetLink> makeEvidenceLinkTree(final List<EvidenceTargetLink> evidenceTargetLinks) {
final PairedStrandedIntervalTree<EvidenceTargetLink> evidenceLinkTree;

annotatedVariants.addAll(impreciseVariants);
if (evidenceTargetLinks != null) {
evidenceLinkTree = new PairedStrandedIntervalTree<>();
evidenceTargetLinks.forEach(l -> evidenceLinkTree.put(l.getPairedStrandedIntervals(), l));
} else {
annotatedVariants = assemblyBasedVariants;
evidenceLinkTree = null;
}

return annotatedVariants;
return evidenceLinkTree;
}

// interpretation: assembly-based ==================================================================================

private static void experimentalInterpretation(final JavaSparkContext ctx,
final FindBreakpointEvidenceSpark.AssembledEvidenceResults assembledEvidenceResults,
final SvDiscoveryInputMetaData svDiscoveryInputMetaData) {
Expand Down Expand Up @@ -316,21 +312,50 @@ private static JavaRDD<GATKRead> getContigRawAlignments(final JavaSparkContext c
}

/**
* Makes a PairedStrandedIntervalTree from a list of EvidenceTargetLinks. The value of each entry in the resulting tree
* will be the original EvidenceTargetLink. If the input list is null, returns a null tree.
* Uses the input EvidenceTargetLinks to
* <ul>
* <li>
* either annotate the variants called from assembly discovery with split read and read pair evidence, or
* </li>
* <li>
* to call new imprecise variants if the number of pieces of evidence exceeds a given threshold.
* </li>
* </ul>
*
*/
private PairedStrandedIntervalTree<EvidenceTargetLink> makeEvidenceLinkTree(final List<EvidenceTargetLink> evidenceTargetLinks) {
final PairedStrandedIntervalTree<EvidenceTargetLink> evidenceLinkTree;
private static List<VariantContext> processEvidenceTargetLinks(List<VariantContext> assemblyBasedVariants,
final SvDiscoveryInputMetaData svDiscoveryInputMetaData) {

if (evidenceTargetLinks != null) {
evidenceLinkTree = new PairedStrandedIntervalTree<>();
evidenceTargetLinks.forEach(l -> evidenceLinkTree.put(l.getPairedStrandedIntervals(), l));
final List<VariantContext> annotatedVariants;
if (svDiscoveryInputMetaData.getSampleSpecificData().getEvidenceTargetLinks() != null) {
final PairedStrandedIntervalTree<EvidenceTargetLink> evidenceTargetLinks = svDiscoveryInputMetaData.getSampleSpecificData().getEvidenceTargetLinks();
final ReadMetadata readMetadata = svDiscoveryInputMetaData.getSampleSpecificData().getReadMetadata();
final ReferenceMultiSource reference = svDiscoveryInputMetaData.getReferenceData().getReferenceBroadcast().getValue();
final VariantsDiscoveryFromContigsAlignmentsSparkArgumentCollection discoverStageArgs = svDiscoveryInputMetaData.getDiscoverStageArgs();
final Logger toolLogger = svDiscoveryInputMetaData.getToolLogger();

// annotate with evidence links
annotatedVariants = AnnotatedVariantProducer.
annotateBreakpointBasedCallsWithImpreciseEvidenceLinks(assemblyBasedVariants,
evidenceTargetLinks, readMetadata, reference, discoverStageArgs, toolLogger);

// then also imprecise deletion
final List<VariantContext> impreciseVariants = ImpreciseVariantDetector.
callImpreciseDeletionFromEvidenceLinks(evidenceTargetLinks, readMetadata, reference,
discoverStageArgs.impreciseVariantEvidenceThreshold,
discoverStageArgs.maxCallableImpreciseVariantDeletionSize,
toolLogger);

annotatedVariants.addAll(impreciseVariants);
} else {
evidenceLinkTree = null;
annotatedVariants = assemblyBasedVariants;
}
return evidenceLinkTree;

return annotatedVariants;
}

// parser ==========================================================================================================

public static final class InMemoryAlignmentParser extends AlignedContigGenerator implements Serializable {
private static final long serialVersionUID = 1L;

Expand Down Expand Up @@ -466,23 +491,4 @@ public JavaRDD<AlignedContig> getAlignedContigs() {
}
}

public static Broadcast<SVIntervalTree<VariantContext>> broadcastCNVCalls(final JavaSparkContext ctx,
final SAMFileHeader header,
final String cnvCallsFile) {
final SVIntervalTree<VariantContext> cnvCalls;
if (cnvCallsFile != null) {
cnvCalls = CNVInputReader.loadCNVCalls(cnvCallsFile, header);
} else {
cnvCalls = null;
}

final Broadcast<SVIntervalTree<VariantContext>> broadcastCNVCalls;
if (cnvCalls != null) {
broadcastCNVCalls = ctx.broadcast(cnvCalls);
} else {
broadcastCNVCalls = null;
}
return broadcastCNVCalls;
}

}

0 comments on commit ece04d2

Please sign in to comment.