Skip to content

Commit

Permalink
Add full-sized B37 and HG38 references to our large test data (broadi…
Browse files Browse the repository at this point in the history
  • Loading branch information
droazen authored and EdwardDixon committed Nov 9, 2018
1 parent 3a7a3ea commit c56470f
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 0 deletions.
8 changes: 8 additions & 0 deletions src/test/java/org/broadinstitute/hellbender/GATKBaseTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ public abstract class GATKBaseTest extends BaseTest {
*/
public static final String largeFileTestDir = new File(publicTestDir, "large").getAbsolutePath() + "/";

// The complete B37 human reference, including the Epstein-Barr contig, in fasta.gz format.
// Source: /seq/references/Homo_sapiens_assembly19/v1/ in the Broad Institute filesystem.
public static final String b37Reference = largeFileTestDir + "Homo_sapiens_assembly19.fasta.gz";

// The complete HG38 human reference, in fasta.gz format.
// Source: /seq/references/Homo_sapiens_assembly38/v0/ in the Broad Institute filesystem.
public static final String hg38Reference = largeFileTestDir + "Homo_sapiens_assembly38.fasta.gz";

// All of chromosomes 20 and 21 from the b37 reference
public static final String b37_reference_20_21 = largeFileTestDir + "human_g1k_v37.20.21.fasta";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,28 @@ public void testQueryAndIterate(final Path testReference, final SimpleInterval i
}
}
}

/**
* Test that we can successfully load and query our full-sized B37 reference.
*/
@Test
public void testLoadAndQueryB37Reference() {
try (final ReferenceDataSource ref = new ReferenceFileSource(IOUtils.getPath(b37Reference))) {
Assert.assertEquals(ref.getSequenceDictionary().getSequences().size(), 85, "Wrong number of contigs in reference sequence dictionary");

Assert.assertTrue(Arrays.equals(ref.queryAndPrefetch("1", 10000000, 10000005).getBases(), new byte[]{ 'A', 'A', 'C', 'C', 'C', 'C' }), "Wrong reference bases returned for query on 1:10000000-10000005");
}
}

/**
* Test that we can successfully load and query our full-sized HG38 reference.
*/
@Test
public void testLoadAndQueryHG38Reference() {
try (final ReferenceDataSource ref = new ReferenceFileSource(IOUtils.getPath(hg38Reference))) {
Assert.assertEquals(ref.getSequenceDictionary().getSequences().size(), 3366, "Wrong number of contigs in reference sequence dictionary");

Assert.assertTrue(Arrays.equals(ref.queryAndPrefetch("chr1", 10000000, 10000005).getBases(), new byte[]{ 'C', 'A', 'G', 'G', 'T', 'G' }), "Wrong reference bases returned for query on chr1:10000000-10000005");
}
}
}
3 changes: 3 additions & 0 deletions src/test/resources/large/Homo_sapiens_assembly19.dict
Git LFS file not shown
3 changes: 3 additions & 0 deletions src/test/resources/large/Homo_sapiens_assembly19.fasta.gz
Git LFS file not shown
3 changes: 3 additions & 0 deletions src/test/resources/large/Homo_sapiens_assembly19.fasta.gz.fai
Git LFS file not shown
3 changes: 3 additions & 0 deletions src/test/resources/large/Homo_sapiens_assembly19.fasta.gz.gzi
Git LFS file not shown
3 changes: 3 additions & 0 deletions src/test/resources/large/Homo_sapiens_assembly38.dict
Git LFS file not shown
3 changes: 3 additions & 0 deletions src/test/resources/large/Homo_sapiens_assembly38.fasta.gz
Git LFS file not shown
3 changes: 3 additions & 0 deletions src/test/resources/large/Homo_sapiens_assembly38.fasta.gz.fai
Git LFS file not shown
3 changes: 3 additions & 0 deletions src/test/resources/large/Homo_sapiens_assembly38.fasta.gz.gzi
Git LFS file not shown

0 comments on commit c56470f

Please sign in to comment.