Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix a regression where VcfWriter would not write a regular file index #816

Merged
merged 3 commits into from
Mar 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 19 additions & 10 deletions src/main/scala/com/fulcrumgenomics/vcf/api/VcfWriter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import htsjdk.samtools.Defaults
import htsjdk.variant.variantcontext.writer.{Options, VariantContextWriter, VariantContextWriterBuilder}

import java.nio.file.Files
import java.nio.file.LinkOption.NOFOLLOW_LINKS
import java.nio.file.attribute.BasicFileAttributes

/**
* Writes [[Variant]]s to a file or other storage mechanism.
Expand All @@ -49,33 +49,42 @@ object VcfWriter {
var DefaultUseAsyncIo: Boolean = Defaults.USE_ASYNC_IO_WRITE_FOR_TRIBBLE

/**
* Creates a [[VcfWriter]] that will write to the give path. The path must end in either
* - `.vcf` to create an uncompressed VCF file
* - `.vcf.gz` to create a block-gzipped VCF file
* - `.bcf` to create a binary BCF file
* Creates a [[VcfWriter]] that will write to the given path. If the path is meant to point to a regular file, then
* the path must end in either:
*
* - `.vcf`: to create an uncompressed VCF file
* - `.vcf.gz`: to create a block-gzipped VCF file
* - `.bcf`: to create a binary BCF file
*
* If the path is meant to point to a regular file, then indexing will occur automatically. However, if the path
* already exists and the path is not a file or symbolic link, then this function will assume the path is a named
* pipe or device (such as `/dev/null`) and indexing will not occur.
*
* @param path the path to write to
* @param header the header of the VCF
* @return a VariantWriter to write to the given path
* @return a VCF writer to write to the given path
*/
def apply(path: PathToVcf, header: VcfHeader, async: Boolean = DefaultUseAsyncIo): VcfWriter = {
import com.fulcrumgenomics.fasta.Converters.ToSAMSequenceDictionary
val javaHeader = VcfConversions.toJavaHeader(header)
require(!Files.isDirectory(path), s"Path cannot be a directory! Found $path")

val builder = new VariantContextWriterBuilder()
.setOutputPath(path)
.setReferenceDictionary(header.dict.asSam)
.setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER)
.setBuffer(Io.bufferSize)

if (Files.isRegularFile(path, NOFOLLOW_LINKS)) {
builder.setOption(Options.INDEX_ON_THE_FLY)
} else {
if (async) builder.setOption(Options.USE_ASYNC_IO) else builder.unsetOption(Options.USE_ASYNC_IO)

// If the path exists and is not a file or symbolic link, then assume it is a named pipe and do not index.
if (Files.exists(path) && Files.readAttributes(path, classOf[BasicFileAttributes]).isOther) {
builder.unsetOption(Options.INDEX_ON_THE_FLY)
builder.setIndexCreator(null)
} else {
builder.setOption(Options.INDEX_ON_THE_FLY)
}

if (async) builder.setOption(Options.USE_ASYNC_IO) else builder.unsetOption(Options.USE_ASYNC_IO)
val writer = builder.build()
writer.writeHeader(javaHeader)
new VcfWriter(writer, header)
Expand Down
67 changes: 58 additions & 9 deletions src/test/scala/com/fulcrumgenomics/vcf/api/VcfIoTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,15 @@

package com.fulcrumgenomics.vcf.api

import com.fulcrumgenomics.commons.io.Io
import com.fulcrumgenomics.testing.{UnitSpec, VcfBuilder}
import com.fulcrumgenomics.commons.CommonsDef.SafelyClosable
import com.fulcrumgenomics.commons.io.{Io, PathUtil}
import com.fulcrumgenomics.testing.VcfBuilder.Gt
import com.fulcrumgenomics.testing.{UnitSpec, VcfBuilder}
import com.fulcrumgenomics.vcf.api.Allele.NoCallAllele
import htsjdk.samtools.util.FileExtensions.{BCF => BcfExtension, COMPRESSED_VCF => VcfGzExtension, CSI => CsiExtensions, TABIX_INDEX => TbiExtension, TRIBBLE_INDEX => IdxExtension, VCF => VcfExtension}
import org.scalatest.OptionValues

import java.nio.file.Files
import scala.collection.compat._
import scala.collection.immutable.ListMap

Expand Down Expand Up @@ -67,7 +70,7 @@ class VcfIoTest extends UnitSpec with OptionValues {

/** Writes out the variants to a file and reads them back, returning the header and the variants. */
@inline private def roundtrip(variants: IterableOnce[Variant], header: VcfHeader = VcfIoTest.Header): Result = {
val vcf = makeTempFile("test.", ".vcf")
val vcf = makeTempFile(getClass.getSimpleName, VcfExtension)
val out = VcfWriter(vcf, header)
out ++= variants
out.close()
Expand Down Expand Up @@ -167,7 +170,7 @@ class VcfIoTest extends UnitSpec with OptionValues {
val variants = Range.inclusive(1000, 2000, step=10).map { s =>
Variant(chrom="chr1", pos=s, alleles=alleles, genotypes=Map("s1" -> Genotype(alleles, "s1", alleles.alts)))
}
val vcf = makeTempFile("queryable.", ".vcf.gz")
val vcf = makeTempFile("queryable.", VcfGzExtension)
val out = VcfWriter(vcf, VcfIoTest.Header)
out ++= variants
out.close()
Expand Down Expand Up @@ -227,12 +230,58 @@ class VcfIoTest extends UnitSpec with OptionValues {
v.gt("0").callIndices.mkString("/") shouldBe "2/3"
}

it should "not attempt to index a VCF when streaming to a file handle or other kind of non-regular file" in {
it should "not allow a VCF writer to write to a directory path" in {
val samples = Seq("sample")
val builder = VcfBuilder(samples=samples)
val writer = VcfWriter(Io.DevNull, header = builder.header)
builder.add(chrom = "chr1", pos = 100, alleles = Seq("A", "C"), gts = Seq(Gt(sample="sample", gt="0/1")))
noException shouldBe thrownBy { writer.write(builder.toSeq) }
val builder = VcfBuilder(samples = samples)
val output = Io.makeTempDir(getClass.getSimpleName)
output.toFile.deleteOnExit()
an[IllegalArgumentException] shouldBe thrownBy { VcfWriter(output, header = builder.header) }
}

it should "write a sibling index when writing to a plaintext VCF file" in {
val samples = Seq("sample")
val builder = VcfBuilder(samples = samples)
val output = makeTempFile(getClass.getSimpleName, VcfExtension)
val writer = VcfWriter(output, header = builder.header)
builder.add(chrom = "chr1", pos = 100, alleles = Seq("A", "C"), gts = Seq(Gt(sample = "sample", gt = "0/1")))
writer.close()
val source = VcfSource(output)
source.isQueryable shouldBe true
source.safelyClose()
}

it should "write a sibling index when writing to a compressed VCF file" in {
val samples = Seq("sample")
val builder = VcfBuilder(samples = samples)
val output = makeTempFile(getClass.getSimpleName, VcfGzExtension)
val writer = VcfWriter(output, header = builder.header)
builder.add(chrom = "chr1", pos = 100, alleles = Seq("A", "C"), gts = Seq(Gt(sample = "sample", gt = "0/1")))
writer.close()
val source = VcfSource(output)
source.isQueryable shouldBe true
source.safelyClose()
}

it should "write a sibling index when writing to a BCF file" in {
val samples = Seq("sample")
val builder = VcfBuilder(samples = samples)
val output = makeTempFile(getClass.getSimpleName, BcfExtension)
val writer = VcfWriter(output, header = builder.header)
builder.add(chrom = "chr1", pos = 100, alleles = Seq("A", "C"), gts = Seq(Gt(sample = "sample", gt = "0/1")))
writer.close()
val source = VcfSource(output)
source.isQueryable shouldBe true
source.safelyClose()
}

it should "not attempt to write a sibling index when streaming to a named pipe like '/dev/null'" in {
val samples = Seq("sample")
val builder = VcfBuilder(samples = samples)
val writer = VcfWriter(Io.DevNull, header = builder.header)
builder.add(chrom = "chr1", pos = 100, alleles = Seq("A", "C"), gts = Seq(Gt(sample = "sample", gt = "0/1")))
noException shouldBe thrownBy { writer.write(builder.toSeq); writer.close() }
Files.exists(PathUtil.pathTo(Io.DevNull.getFileName.toString + CsiExtensions)) shouldBe false
Files.exists(PathUtil.pathTo(Io.DevNull.getFileName.toString + IdxExtension)) shouldBe false
Files.exists(PathUtil.pathTo(Io.DevNull.getFileName.toString + TbiExtension)) shouldBe false
}
}