Skip to content

Commit

Permalink
Add tribble test data, clean up test suite
Browse files Browse the repository at this point in the history
Fix bug in LinearIndex related to improper commenting/if-statement
  • Loading branch information
jacobbroad committed Jan 8, 2013
1 parent f9fa912 commit e8c848f
Show file tree
Hide file tree
Showing 40 changed files with 6,881 additions and 113 deletions.
10 changes: 6 additions & 4 deletions build-tribble.xml
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,16 @@
<project name="tribble" basedir="." default="all">

<property name="src" value="src"/>
<property name="src.test" value="tests/java"/>
<property name="src.test" value="${src}/tests/java"/>

<property name="lib" value="lib"/>
<property name="dist" value="dist"/>
<property name="classes" value="classes"/>
<property name="classes.test" value="testclasses"/>
<property name="reports" value="dist/test"/>
<property name="javac.debug" value="true"/>
<property name="javac.target" value="1.6"/>
<property name="tribble-version" value="0.2"/>
<property name="tribble-version" value="0"/>
<property name="singleTest" value="*" /> <!-- used to determine what tests to run -->
<!-- INIT -->
<target name="init">
Expand Down Expand Up @@ -76,9 +77,10 @@
<javac destdir="${classes.test}"
debug="${javac.debug}"
target="${javac.target}"
source="${javac.target}">
source="${javac.target}"
srcdir="${src.test}">
<compilerarg value="-Xlint:all"/>
<src path="${src.test}"/>
<include name="org/broad/tribble/**/*"/>
<classpath>
<path refid="classpath"/>
<pathelement location="${classes}"/>
Expand Down
80 changes: 0 additions & 80 deletions src/java/org/broad/tribble/index/IndexFactoryTest.java

This file was deleted.

23 changes: 1 addition & 22 deletions src/java/org/broad/tribble/index/linear/LinearIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,6 @@

public class LinearIndex extends AbstractIndex implements Index {

//Going to remove logging from tribble
@Deprecated
private final static boolean DEBUG = false;

// NOTE: To debug uncomment the System.getProperty and recompile.
public static final double MAX_FEATURES_PER_BIN = Double.valueOf(System.getProperty("MAX_FEATURES_PER_BIN", "100"));

Expand Down Expand Up @@ -314,9 +310,6 @@ private static boolean badBinWidth(ChrIndex idx) {
if (idx.binWidth > MAX_BIN_WIDTH || idx.binWidth < 0) // an overflow occurred
return true;
else if (MAX_BIN_WIDTH_FOR_OCCUPIED_CHR_INDEX != 0 && idx.getNFeatures() > 1 && idx.binWidth > MAX_BIN_WIDTH_FOR_OCCUPIED_CHR_INDEX) {
// if ( DEBUG )
// log.debug(String.format("No longer merging up bins on %s with %d features as binWidth %d > max %d for occupied indices",
// idx.getName(), idx.getNFeatures(), idx.binWidth, MAX_BIN_WIDTH_FOR_OCCUPIED_CHR_INDEX));
return true;
} else {
return false;
Expand All @@ -328,10 +321,6 @@ private static ChrIndex optimize(ChrIndex idx, double threshold, int level) {

while (true) {
double score = idx.optimizeScore();
if ( DEBUG )
//log.debug(String.format(" %s%6s with %8d bins of size %6d: feature size est. is %.2f, features per block %.5f, most dense %.5f, score %.5f",
// dupString(' ', level * 2), idx.getName(), idx.getNBlocks(), idx.binWidth, idx.getAverageFeatureSize(),
// idx.getFeaturesPerBlock(), idx.getNFeaturesOfMostDenseBlock(idx.getAverageFeatureSize()), score));

if (score > threshold || idx.getNBlocks() == 1 || badBinWidth(idx))
break;
Expand Down Expand Up @@ -387,24 +376,14 @@ private static String dupString(char c, int nCopies) {
// ----------------------------------------------------------------------------------------------------
public Index optimize(double threshold) {
if (enableAdaptiveIndexing) {
// if ( DEBUG )
// log.debug("Adaptive optimization of " + this.indexedFile + " with threshold " + threshold);
// if ( DEBUG && log.isDebugEnabled())
// printIndexInfo();

List<ChrIndex> newIndices = new ArrayList<ChrIndex>(this.chrIndices.size());
for (String name : chrIndices.keySet()) {
LinearIndex.ChrIndex oldIdx = (LinearIndex.ChrIndex) chrIndices.get(name);
LinearIndex.ChrIndex newIdx = oldIdx.optimize(threshold);
newIndices.add(newIdx);
}

LinearIndex newIndex = new LinearIndex(this, newIndices);

// if ( DEBUG )
// log.debug(String.format("Old index %s vs. new %s", this.statsSummary(), newIndex.statsSummary()));

return newIndex;
return new LinearIndex(this, newIndices);
} else {
return this;
}
Expand Down
55 changes: 55 additions & 0 deletions src/tests/java/org/broad/tribble/BinaryFeaturesTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package org.broad.tribble;

import org.broad.tribble.bed.BEDCodec;
import org.broad.tribble.example.ExampleBinaryCodec;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.*;
import java.util.Iterator;
import java.util.List;


public class BinaryFeaturesTest {
@DataProvider(name = "BinaryFeatureSources")
public Object[][] createData1() {
return new Object[][] {
{ new File(TestUtils.DATA_DIR + "test.bed"), new BEDCodec() },
{ new File(TestUtils.DATA_DIR + "bed/Unigene.sample.bed"), new BEDCodec() },
{ new File(TestUtils.DATA_DIR + "bed/NA12878.deletions.10kbp.het.gq99.hand_curated.hg19_fixed.bed"), new BEDCodec() },
};
}

@Test(enabled = true, dataProvider = "BinaryFeatureSources")
public void testBinaryCodec(final File source, final FeatureCodec<Feature> codec) throws IOException {
final File tmpFile = File.createTempFile("testBinaryCodec", ".binary.bed");
ExampleBinaryCodec.convertToBinaryTest(source, tmpFile, codec);
tmpFile.deleteOnExit();

final FeatureReader<Feature> originalReader = AbstractFeatureReader.getFeatureReader(source.getAbsolutePath(), codec, false);
final FeatureReader<Feature> binaryReader = AbstractFeatureReader.getFeatureReader(tmpFile.getAbsolutePath(), new ExampleBinaryCodec(), false);

// make sure the header is what we expect
final List<String> header = (List<String>)binaryReader.getHeader();
Assert.assertEquals(header.size(), 1, "We expect exactly one header line");
Assert.assertEquals(header.get(0), ExampleBinaryCodec.HEADER_LINE, "Failed to read binary header line");

final Iterator<Feature> oit = originalReader.iterator();
final Iterator<Feature> bit = binaryReader.iterator();
while ( oit.hasNext() ) {
final Feature of = oit.next();

Assert.assertTrue(bit.hasNext(), "Original iterator has items, but there's no items left in binary iterator");
final Feature bf = bit.next();

Assert.assertEquals(bf.getChr(), of.getChr(), "Chr not equal between original and binary encoding");
Assert.assertEquals(bf.getStart(), of.getStart(), "Start not equal between original and binary encoding");
Assert.assertEquals(bf.getEnd(), of.getEnd(), "End not equal between original and binary encoding");
}
Assert.assertTrue(! bit.hasNext(), "Original iterator is done, but there's still some data in binary iterator");

originalReader.close();
binaryReader.close();
}
}
163 changes: 163 additions & 0 deletions src/tests/java/org/broad/tribble/FeatureReaderTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
package org.broad.tribble;

import net.sf.samtools.seekablestream.SeekableFileStream;
import org.broad.tribble.bed.BEDCodec;
import org.broad.tribble.example.ExampleBinaryCodec;
import org.broad.tribble.index.Block;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.util.ParsingUtils;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;


public class FeatureReaderTest {
private final static File asciiBedFile = new File(TestUtils.DATA_DIR + "test.bed");
private final static File binaryBedFile = new File(TestUtils.DATA_DIR + "test.binary.bed");
private final static File tabixBedFile = new File(TestUtils.DATA_DIR + "test.tabix.bed.gz");

@BeforeClass
public void setup() throws IOException {
ExampleBinaryCodec.convertToBinaryTest(asciiBedFile, binaryBedFile, new BEDCodec());
}

@AfterClass
public void teardown() throws Exception {
// binaryBedFile.delete();
}

@DataProvider(name = "indexProvider")
public Object[][] createData1() {
return new Object[][]{
{asciiBedFile, IndexFactory.IndexType.LINEAR, new BEDCodec()},
{asciiBedFile, IndexFactory.IndexType.INTERVAL_TREE, new BEDCodec()},
{tabixBedFile, IndexFactory.IndexType.TABIX, new BEDCodec()},
{binaryBedFile, IndexFactory.IndexType.LINEAR, new ExampleBinaryCodec()},
{binaryBedFile, IndexFactory.IndexType.INTERVAL_TREE, new ExampleBinaryCodec()},
};
}

@Test(dataProvider = "indexProvider")
public void testBedQuery(final File featureFile, IndexFactory.IndexType indexType, FeatureCodec<Feature> codec) throws IOException {
final AbstractFeatureReader<Feature> reader = getReader(featureFile, indexType, codec);

// Query
testQuery(reader, "chr1", 1, 500, 3);
testQuery(reader, "chr1", 1, 200, 1);
testQuery(reader, "chr1", 1, 201, 2);
testQuery(reader, "chr1", 500, 600, 0);
testQuery(reader, "chr1", 100000, 100010, 1);
testQuery(reader, "chr1", 100000, 100000, 0);
testQuery(reader, "chr1", 100001, 100001, 1);
testQuery(reader, "chr1", 100005, 100006, 1);
testQuery(reader, "chr1", 100009, 100011, 1);
testQuery(reader, "chr1", 100010, 100010, 1);
testQuery(reader, "chr1", 100011, 100011, 0);
testQuery(reader, "chr2", 1, 100, 2);
testQuery(reader, "chr2", 1, 10, 1);
testQuery(reader, "chr2", 15, 16, 0);
testQuery(reader, "chr3", 1, 201, 0);

// Close reader
reader.close();
}

@Test(dataProvider = "indexProvider")
public void testLargeNumberOfQueries(final File featureFile, IndexFactory.IndexType indexType, FeatureCodec<Feature> codec) throws IOException {
final AbstractFeatureReader<Feature> reader = getReader(featureFile, indexType, codec);

final List<Integer> sites = Arrays.asList(500, 200, 201, 600, 100000);
for (int i = 0; i < 2000; i++) {
for (int start : sites) {
int end = start + 1; // query so we find something
if (start < end) {
for (final String chr : Arrays.asList("chr1", "chr2", "chr3")) {
CloseableTribbleIterator<Feature> iter = reader.query(chr, start, end);
Assert.assertTrue(iter != null, "Failed to create non-null iterator");
iter.close();
}
}
}
}

// Close reader
reader.close();
}

private void testQuery(AbstractFeatureReader<Feature> reader, final String chr, int start, int stop, int expectedNumRecords) throws IOException {
Iterator<Feature> iter = reader.query(chr, start, stop);
int count = 0;
while (iter.hasNext()) {
final Feature f = iter.next();
Assert.assertTrue(f.getEnd() >= start && f.getStart() <= stop);
count++;
}
Assert.assertEquals(count, expectedNumRecords);
}

@Test(dataProvider = "indexProvider")
public void testBedNames(final File featureFile, IndexFactory.IndexType indexType, FeatureCodec<Feature> codec) throws IOException {
final AbstractFeatureReader<Feature> reader = getReader(featureFile, indexType, codec);
String[] expectedSequences = {"chr1", "chr2"};

List<String> seqNames = reader.getSequenceNames();
Assert.assertEquals(seqNames.size(), expectedSequences.length,
"Expected sequences " + ParsingUtils.join(",", expectedSequences) + " but saw " + ParsingUtils.join(",", seqNames));

for (String s : expectedSequences) {
Assert.assertTrue(seqNames.contains(s));
}
}

private AbstractFeatureReader<Feature> getReader(final File featureFile,
IndexFactory.IndexType indexType,
FeatureCodec<Feature> codec)
throws IOException {
if (indexType.canCreate()) {
// for types we can create make a new index each time
File idxFile = Tribble.indexFile(featureFile);

// delete an already existing index
if (idxFile.exists()) {
idxFile.delete();
}
Index idx = IndexFactory.createIndex(featureFile, codec, indexType);
IndexFactory.writeIndex(idx, idxFile);

idxFile.deleteOnExit();
} // else let's just hope the index exists, and if so use it

return AbstractFeatureReader.getFeatureReader(featureFile.getAbsolutePath(), codec);
}

@Test()
public void testReadingBeyondIntSizedBlock() throws IOException {
final Block block = new Block(0, ((long) Integer.MAX_VALUE) * 2);
final SeekableFileStream stream = new SeekableFileStream(new File("/dev/zero"));
final TribbleIndexedFeatureReader.BlockStreamWrapper blockStreamWrapper = new TribbleIndexedFeatureReader.BlockStreamWrapper(stream, block);
final int chunkSize = 100000; // 10 Mb
final int chunksToRead = (int) Math.ceil(block.getSize() / (chunkSize * 1.0));

final byte[] bytes = new byte[chunkSize];
long totalRead = 0;
for (int chunk = 0; chunk < chunksToRead; chunk++) {
//System.out.println("Reading chunk " + chunk + " of " + chunkSize + " total read " + totalRead);
final int nRead = blockStreamWrapper.read(bytes);
Assert.assertTrue(nRead != -1, "Prematurely got EOF after " + totalRead + " bytes");
totalRead += nRead;
}

Assert.assertEquals(totalRead, block.getSize(), "Failed to read all bytes from a block with size > 2B = " + block.getSize());

}
}

Loading

0 comments on commit e8c848f

Please sign in to comment.