Skip to content

Commit

Permalink
Code review comments and added unit tests for stop site calculation.
Browse files Browse the repository at this point in the history
  • Loading branch information
cmnbroad committed Apr 23, 2018
1 parent 9bcfa71 commit e33b5d5
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -152,21 +152,13 @@ public void apply(List<VariantContext> variantContexts, ReferenceContext referen
*/
@VisibleForTesting
void createIntermediateVariants(SimpleInterval intervalToClose) {
Set<Integer> sitesToStop = new HashSet<>();
resizeReferenceIfNeeded(intervalToClose);

// Break up the GVCF according to the provided reference blocking scheme
if ( multipleAtWhichToBreakBands > 0) {
// if the intermediate interval to close starts before the end of the first interval,
// create the first stop position at the end of the band multiple
for (int blockEndPosition = intervalToClose.getStart() < multipleAtWhichToBreakBands ?
multipleAtWhichToBreakBands :
(intervalToClose.getStart() / multipleAtWhichToBreakBands) * multipleAtWhichToBreakBands;
blockEndPosition <= intervalToClose.getEnd();
blockEndPosition += multipleAtWhichToBreakBands) {
sitesToStop.add(blockEndPosition - 1); // Subtract 1 here because we want to split before this base
}
}
// Note: Precomputing these in a set is really inefficient when large reference blocks are closed with
// fine band resolution because it results in very large list (tens or hundreds of millions) of stop sites
// that must subsequently be sorted.
final Set<Integer> sitesToStop = getIntermediateStopSites(intervalToClose, multipleAtWhichToBreakBands);

// If any variant contexts ended (or were spanning deletions) the last context compute where we should stop them
for (VariantContext vc : variantContextsOverlappingCurrentMerge) {
Expand Down Expand Up @@ -198,6 +190,25 @@ void createIntermediateVariants(SimpleInterval intervalToClose) {

}

// Get any intermediate stop sites based on the break band multiple.
@VisibleForTesting
protected final static Set<Integer> getIntermediateStopSites(final SimpleInterval intervalToClose, final int breakBandMultiple) {
final Set<Integer> sitesToStop = new HashSet<>();

if ( breakBandMultiple > 0) {
// if the intermediate interval to close starts before the end of the first band multiple,
// create the first stop position at the end of the band multiple
for (int blockEndPosition = intervalToClose.getStart() < (breakBandMultiple + 1) ?
Math.max(2, breakBandMultiple) :
(intervalToClose.getStart() / breakBandMultiple) * breakBandMultiple;
blockEndPosition <= intervalToClose.getEnd();
blockEndPosition += breakBandMultiple) {
sitesToStop.add(blockEndPosition - 1); // Subtract 1 here because we want to split before this base
}
}
return sitesToStop;
}

/**
* Resize {@link #storedReferenceContext} to cover at least as much as intervalToClose
* @param intervalToClose
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package org.broadinstitute.hellbender.tools.walkers;

import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.util.*;
import java.util.stream.Collectors;

public class CombineGVCFsUnitTest {

@DataProvider(name="breakIntermediateStopSites")
public Object[][] getIntermediateStopSitesData() {
return new Object[][] {
{ new SimpleInterval("contig", 1, 1), 1, Collections.EMPTY_LIST },
{ new SimpleInterval("contig", 1, 2), 1, Arrays.asList(1) },
{ new SimpleInterval("contig", 1, 2), 2, Arrays.asList(1) },
{ new SimpleInterval("contig", 1, 10), 2, Arrays.asList(1, 3, 5, 7, 9) },
{ new SimpleInterval("contig", 1, 10), 5, Arrays.asList(4, 9) },
{ new SimpleInterval("contig", 1, 100), 25, Arrays.asList(24, 49, 74, 99) },

{ new SimpleInterval("contig", 10, 10), 2, Arrays.asList(9) },
{ new SimpleInterval("contig", 10, 10), 5, Arrays.asList(9) },

// TODO ?? is this the correct result ?
{ new SimpleInterval("contig", 10, 10), 10, Arrays.asList(9) },
{ new SimpleInterval("contig", 10, 10), 100, Collections.EMPTY_LIST },

{ new SimpleInterval("contig", 10, 20), 5, Arrays.asList(9, 14, 19) },
{ new SimpleInterval("contig", 10, 20), 10, Arrays.asList(9, 19) },
{ new SimpleInterval("contig", 10, 20), 100, Collections.EMPTY_LIST },

{ new SimpleInterval("contig", 10, 100), 25, Arrays.asList(24, 49, 74, 99) },
{ new SimpleInterval("contig", 10, 100), 50, Arrays.asList(49, 99) },
{ new SimpleInterval("contig", 10, 100), 100, Arrays.asList(99) },
{ new SimpleInterval("contig", 10, 100), 1000, Collections.EMPTY_LIST },
};
}

@Test(dataProvider = "breakIntermediateStopSites")
public void testGetIntermediateStopSites(
final SimpleInterval intervalToClose,
final int breakBandMultiple,
final List<Integer> expectedCloseSites)
{
final List<Integer> actualStopSites = new ArrayList<>(CombineGVCFs.getIntermediateStopSites(intervalToClose, breakBandMultiple));
actualStopSites.sort(Comparator.naturalOrder());
// validate that the resulting stop sites all result in valid single-position stop intervals
actualStopSites.stream().forEach(stopSite -> Assert.assertNotNull(new SimpleInterval(intervalToClose.getContig(), stopSite, stopSite)));
Assert.assertEquals(actualStopSites, expectedCloseSites);
}

}

0 comments on commit e33b5d5

Please sign in to comment.