diff --git a/src/main/scala/com/fulcrumgenomics/vcf/api/VcfWriter.scala b/src/main/scala/com/fulcrumgenomics/vcf/api/VcfWriter.scala index 47016c221..afc9b0087 100644 --- a/src/main/scala/com/fulcrumgenomics/vcf/api/VcfWriter.scala +++ b/src/main/scala/com/fulcrumgenomics/vcf/api/VcfWriter.scala @@ -31,7 +31,7 @@ import htsjdk.samtools.Defaults import htsjdk.variant.variantcontext.writer.{Options, VariantContextWriter, VariantContextWriterBuilder} import java.nio.file.Files -import java.nio.file.LinkOption.NOFOLLOW_LINKS +import java.nio.file.attribute.BasicFileAttributes /** * Writes [[Variant]]s to a file or other storage mechanism. @@ -49,18 +49,25 @@ object VcfWriter { var DefaultUseAsyncIo: Boolean = Defaults.USE_ASYNC_IO_WRITE_FOR_TRIBBLE /** - * Creates a [[VcfWriter]] that will write to the give path. The path must end in either - * - `.vcf` to create an uncompressed VCF file - * - `.vcf.gz` to create a block-gzipped VCF file - * - `.bcf` to create a binary BCF file + * Creates a [[VcfWriter]] that will write to the given path. If the path is meant to point to a regular file, then + * the path must end in either: + * + * - `.vcf`: to create an uncompressed VCF file + * - `.vcf.gz`: to create a block-gzipped VCF file + * - `.bcf`: to create a binary BCF file + * + * If the path is meant to point to a regular file, then indexing will occur automatically. However, if the path + * already exists and the path is not a file or symbolic link, then this function will assume the path is a named + * pipe (such as `/dev/null`) and indexing will not occur. * * @param path the path to write to * @param header the header of the VCF - * @return a VariantWriter to write to the given path + * @return a VCF writer to write to the given path */ def apply(path: PathToVcf, header: VcfHeader, async: Boolean = DefaultUseAsyncIo): VcfWriter = { import com.fulcrumgenomics.fasta.Converters.ToSAMSequenceDictionary val javaHeader = VcfConversions.toJavaHeader(header) + require(!Files.isDirectory(path), "Input path cannot be a directory!") val builder = new VariantContextWriterBuilder() .setOutputPath(path) @@ -68,14 +75,16 @@ object VcfWriter { .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER) .setBuffer(Io.bufferSize) - if (Files.isRegularFile(path, NOFOLLOW_LINKS)) { - builder.setOption(Options.INDEX_ON_THE_FLY) - } else { + if (async) builder.setOption(Options.USE_ASYNC_IO) else builder.unsetOption(Options.USE_ASYNC_IO) + + // If the path exists and is not a file or symbolic link, then assume it is a named pipe and do not index. + if (Files.exists(path) && Files.readAttributes(path, classOf[BasicFileAttributes]).isOther) { builder.unsetOption(Options.INDEX_ON_THE_FLY) builder.setIndexCreator(null) + } else { + builder.setOption(Options.INDEX_ON_THE_FLY) } - if (async) builder.setOption(Options.USE_ASYNC_IO) else builder.unsetOption(Options.USE_ASYNC_IO) val writer = builder.build() writer.writeHeader(javaHeader) new VcfWriter(writer, header) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/api/VcfIoTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/api/VcfIoTest.scala index 2f8cbf946..70087647f 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/api/VcfIoTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/api/VcfIoTest.scala @@ -24,10 +24,11 @@ package com.fulcrumgenomics.vcf.api -import com.fulcrumgenomics.commons.io.Io -import com.fulcrumgenomics.testing.{UnitSpec, VcfBuilder} +import com.fulcrumgenomics.commons.io.{Io, PathUtil} import com.fulcrumgenomics.testing.VcfBuilder.Gt +import com.fulcrumgenomics.testing.{UnitSpec, VcfBuilder} import com.fulcrumgenomics.vcf.api.Allele.NoCallAllele +import htsjdk.samtools.util.FileExtensions.{BCF => BcfExtension, COMPRESSED_VCF => VcfGzExtension, TABIX_INDEX => TbiExtension, TRIBBLE_INDEX => IdxExtension, VCF => VcfExtension} import org.scalatest.OptionValues import scala.collection.compat._ @@ -67,7 +68,7 @@ class VcfIoTest extends UnitSpec with OptionValues { /** Writes out the variants to a file and reads them back, returning the header and the variants. */ @inline private def roundtrip(variants: IterableOnce[Variant], header: VcfHeader = VcfIoTest.Header): Result = { - val vcf = makeTempFile("test.", ".vcf") + val vcf = makeTempFile(getClass.getSimpleName, VcfExtension) val out = VcfWriter(vcf, header) out ++= variants out.close() @@ -167,7 +168,7 @@ class VcfIoTest extends UnitSpec with OptionValues { val variants = Range.inclusive(1000, 2000, step=10).map { s => Variant(chrom="chr1", pos=s, alleles=alleles, genotypes=Map("s1" -> Genotype(alleles, "s1", alleles.alts))) } - val vcf = makeTempFile("queryable.", ".vcf.gz") + val vcf = makeTempFile("queryable.", VcfGzExtension) val out = VcfWriter(vcf, VcfIoTest.Header) out ++= variants out.close() @@ -227,12 +228,51 @@ class VcfIoTest extends UnitSpec with OptionValues { v.gt("0").callIndices.mkString("/") shouldBe "2/3" } - it should "not attempt to index a VCF when streaming to a file handle or other kind of non-regular file" in { + it should "not allow a VCF writer to write to a directory path" in { val samples = Seq("sample") - val builder = VcfBuilder(samples=samples) - val writer = VcfWriter(Io.DevNull, header = builder.header) - builder.add(chrom = "chr1", pos = 100, alleles = Seq("A", "C"), gts = Seq(Gt(sample="sample", gt="0/1"))) - noException shouldBe thrownBy { writer.write(builder.toSeq) } + val builder = VcfBuilder(samples = samples) + val output = Io.makeTempDir(getClass.getSimpleName) + output.toFile.deleteOnExit() + an[IllegalArgumentException] shouldBe thrownBy { VcfWriter(output, header = builder.header) } + } + + it should "write a sibling index when writing to a plaintext VCF file" in { + val samples = Seq("sample") + val builder = VcfBuilder(samples = samples) + val output = makeTempFile(getClass.getSimpleName, VcfExtension) + val writer = VcfWriter(output, header = builder.header) + builder.add(chrom = "chr1", pos = 100, alleles = Seq("A", "C"), gts = Seq(Gt(sample = "sample", gt = "0/1"))) writer.close() + PathUtil.replaceExtension(output, VcfExtension + IdxExtension).toFile shouldBe readable + } + + it should "write a sibling index when writing to a compressed VCF file" in { + val samples = Seq("sample") + val builder = VcfBuilder(samples = samples) + val output = makeTempFile(getClass.getSimpleName, VcfGzExtension) + val writer = VcfWriter(output, header = builder.header) + builder.add(chrom = "chr1", pos = 100, alleles = Seq("A", "C"), gts = Seq(Gt(sample = "sample", gt = "0/1"))) + writer.close() + PathUtil.replaceExtension(PathUtil.removeExtension(output), VcfGzExtension + TbiExtension).toFile shouldBe readable + } + + it should "write a sibling index when writing to a BCF file" in { + val samples = Seq("sample") + val builder = VcfBuilder(samples = samples) + val output = makeTempFile(getClass.getSimpleName, BcfExtension) + val writer = VcfWriter(output, header = builder.header) + builder.add(chrom = "chr1", pos = 100, alleles = Seq("A", "C"), gts = Seq(Gt(sample = "sample", gt = "0/1"))) + writer.close() + PathUtil.replaceExtension(output, BcfExtension + IdxExtension).toFile shouldBe readable + } + + it should "not attempt to write a sibling index when streaming to a named pipe like '/dev/null'" in { + val samples = Seq("sample") + val builder = VcfBuilder(samples = samples) + val writer = VcfWriter(Io.DevNull, header = builder.header) + builder.add(chrom = "chr1", pos = 100, alleles = Seq("A", "C"), gts = Seq(Gt(sample = "sample", gt = "0/1"))) + noException shouldBe thrownBy { writer.write(builder.toSeq); writer.close() } + PathUtil.pathTo(Io.DevNull.getFileName.toString + IdxExtension).toFile should not be readable + PathUtil.pathTo(Io.DevNull.getFileName.toString + TbiExtension).toFile should not be readable } }