From fd1e170465bdc55fe9e1783ce4a838ea11739a4d Mon Sep 17 00:00:00 2001 From: nalinigans Date: Mon, 16 Jul 2018 09:18:30 -0700 Subject: [PATCH 01/12] Allow for hdfs and gcs URI's to be passed to GenomicsDB --- build.gradle | 2 +- .../hellbender/engine/FeatureDataSource.java | 53 +++++++------- .../tools/genomicsdb/GenomicsDBImport.java | 69 +++++++++---------- 3 files changed, 57 insertions(+), 67 deletions(-) diff --git a/build.gradle b/build.gradle index a94bffa782b..d25eb0e2d8f 100644 --- a/build.gradle +++ b/build.gradle @@ -64,7 +64,7 @@ final sparkVersion = System.getProperty('spark.version', '2.2.0') final hadoopVersion = System.getProperty('hadoop.version', '2.8.2') final hadoopBamVersion = System.getProperty('hadoopBam.version','7.10.0') final tensorflowVersion = System.getProperty('tensorflow.version','1.4.0') -final genomicsdbVersion = System.getProperty('genomicsdb.version','0.9.2-proto-3.0.0-beta-1+b825ffa6eb47a') +final genomicsdbVersion = System.getProperty('genomicsdb.version','0.9.2-proto-3.0.0-beta-1+d37bff4718e5') final testNGVersion = '6.11' // Using the shaded version to avoid conflicts between its protobuf dependency // and that of Hadoop/Spark (either the one we reference explicitly, or the one diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java index e84c5358e19..9a43b943db8 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.engine; +import com.intel.genomicsdb.GenomicsDBUtils; import com.intel.genomicsdb.model.GenomicsDBExportConfiguration; import com.intel.genomicsdb.reader.GenomicsDBFeatureReader; import htsjdk.samtools.SAMSequenceDictionary; @@ -24,6 +25,8 @@ import java.io.File; import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; import java.nio.file.Path; @@ -370,28 +373,23 @@ private static FeatureReader getGenomicsDBFeatureReader(final St } final String noheader = path.replace(GENOMIC_DB_URI_SCHEME, ""); - final File workspace = new File(noheader); - final File callsetJson = new File(noheader, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); - final File vidmapJson = new File(noheader, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); - final File vcfHeader = new File(noheader, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); - - if ( ! workspace.exists() || ! workspace.canRead() || ! workspace.isDirectory() ) { - throw new UserException("GenomicsDB workspace " + workspace.getAbsolutePath() + " does not exist, " + - " is not readable, or is not a directory"); - } - try { - IOUtils.canReadFile(callsetJson); - IOUtils.canReadFile(vidmapJson); - IOUtils.canReadFile(vcfHeader); - } catch ( UserException.CouldNotReadInputFile e ) { - throw new UserException("Couldn't connect to GenomicsDB because the vidmap, callset JSON files, or gVCF Header (" + - GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME + "," + GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME + "," + - GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME + ") could not be read from GenomicsDB workspace " + workspace.getAbsolutePath(), e); - } + final URI workspace; + try { + if (noheader.endsWith("/")) { + workspace = new URI(noheader); + } else { + workspace = new URI(noheader + "/"); + } + } catch (URISyntaxException e) { + throw new UserException("GenomicsDB workspace " + path + " is not valid URI"); + } + final URI callsetJson = workspace.resolve(GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); + final URI vidmapJson = workspace.resolve(GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); + final URI vcfHeader = workspace.resolve(GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); final GenomicsDBExportConfiguration.ExportConfiguration exportConfigurationBuilder = - createExportConfiguration(reference, workspace, callsetJson, vidmapJson, vcfHeader); + createExportConfiguration(reference, workspace, callsetJson, vidmapJson, vcfHeader); try { return new GenomicsDBFeatureReader<>(exportConfigurationBuilder, new BCF2Codec(), Optional.empty()); @@ -400,21 +398,20 @@ private static FeatureReader getGenomicsDBFeatureReader(final St } } - private static GenomicsDBExportConfiguration.ExportConfiguration createExportConfiguration(final File reference, final File workspace, - final File callsetJson, final File vidmapJson, - final File vcfHeader) { + private static GenomicsDBExportConfiguration.ExportConfiguration createExportConfiguration(final File reference, final URI workspace, + final URI callsetJson, final URI vidmapJson, + final URI vcfHeader) { GenomicsDBExportConfiguration.ExportConfiguration.Builder exportConfigurationBuilder = GenomicsDBExportConfiguration.ExportConfiguration.newBuilder() - .setWorkspace(workspace.getAbsolutePath()) + .setWorkspace(workspace.toString()) .setReferenceGenome(reference.getAbsolutePath()) - .setVidMappingFile(vidmapJson.getAbsolutePath()) - .setCallsetMappingFile(callsetJson.getAbsolutePath()) - .setVcfHeaderFilename(vcfHeader.getAbsolutePath()) + .setVidMappingFile(vidmapJson.toString()) + .setCallsetMappingFile(callsetJson.toString()) + .setVcfHeaderFilename(vcfHeader.toString()) .setProduceGTField(false) .setProduceGTWithMinPLValueForSpanningDeletions(false) .setSitesOnlyQuery(false) .setMaxDiploidAltAllelesThatCanBeGenotyped(GenotypeLikelihoods.MAX_DIPLOID_ALT_ALLELES_THAT_CAN_BE_GENOTYPED); - Path arrayFolder = Paths.get(workspace.getAbsolutePath(), GenomicsDBConstants.DEFAULT_ARRAY_NAME).toAbsolutePath(); // For the multi-interval support, we create multiple arrays (directories) in a single workspace - // one per interval. So, if you wish to import intervals ("chr1", [ 1, 100M ]) and ("chr2", [ 1, 100M ]), @@ -429,7 +426,7 @@ private static GenomicsDBExportConfiguration.ExportConfiguration createExportCon // will be backward compatible with respect to reads. Hence, if a directory named genomicsdb_array is found, // the array name is passed to the GenomicsDBFeatureReader otherwise the array names are generated from the // directory entries. - if (Files.exists(arrayFolder)) { + if (GenomicsDBUtils.isGenomicsDBArray(workspace.toString(), GenomicsDBConstants.DEFAULT_ARRAY_NAME)) { exportConfigurationBuilder.setArrayName(GenomicsDBConstants.DEFAULT_ARRAY_NAME); } else { exportConfigurationBuilder.setGenerateArrayNameFromPartitionBounds(true); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java index 3e0c8586f47..3692d26f4db 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java @@ -1,6 +1,7 @@ package org.broadinstitute.hellbender.tools.genomicsdb; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import com.intel.genomicsdb.GenomicsDBUtils; import com.intel.genomicsdb.importer.GenomicsDBImporter; import com.intel.genomicsdb.importer.model.ChromosomeInterval; import com.intel.genomicsdb.model.Coordinates; @@ -37,6 +38,8 @@ import java.io.File; import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; import java.nio.file.Path; @@ -290,13 +293,13 @@ public int getDefaultCloudIndexPrefetchBufferSize() { private SAMSequenceDictionary mergedHeaderSequenceDictionary; // Path to vidmap file to be written by GenomicsDBImporter - private File vidMapJSONFile; + private URI vidMapJSONFile; // Path to callsetmap file to be written by GenomicsDBImporter - private File callsetMapJSONFile; + private URI callsetMapJSONFile; // Path to combined VCF header file to be written by GenomicsDBImporter - private File vcfHeaderFile; + private URI vcfHeaderFile; // GenomicsDB callset map protobuf structure containing all callset names // used to write the callset json file on traversal success @@ -458,11 +461,11 @@ public static SortedMap loadSampleNameMapFileInSortedOrder(final P @Override public void onTraversalStart() { - final File workspaceDir = overwriteOrCreateWorkspace(); + final URI workspaceDir = overwriteOrCreateWorkspace(); - vidMapJSONFile = new File(workspaceDir + "/" + GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); - callsetMapJSONFile = new File(workspaceDir + "/" + GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); - vcfHeaderFile = new File(workspaceDir + "/" + GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); + vidMapJSONFile = workspaceDir.resolve(GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); + callsetMapJSONFile = workspaceDir.resolve(GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); + vcfHeaderFile = workspaceDir.resolve(GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); logger.info("Vid Map JSON file will be written to " + vidMapJSONFile); logger.info("Callset Map JSON file will be written to " + callsetMapJSONFile); @@ -529,9 +532,9 @@ private ImportConfig createImportConfig(final int batchSize) { importConfigurationBuilder.setConsolidateTiledbArrayAfterLoad(doConsolidation); ImportConfig importConfig = new ImportConfig(importConfigurationBuilder.build(), validateSampleToReaderMap, true, batchSize, mergedHeaderLines, sampleNameToVcfPath, this::createSampleToReaderMap); - importConfig.setOutputCallsetmapJsonFile(callsetMapJSONFile.getAbsolutePath()); - importConfig.setOutputVidmapJsonFile(vidMapJSONFile.getAbsolutePath()); - importConfig.setOutputVcfHeaderFile(vcfHeaderFile.getAbsolutePath()); + importConfig.setOutputCallsetmapJsonFile(callsetMapJSONFile.toString()); + importConfig.setOutputVidmapJsonFile(vidMapJSONFile.toString()); + importConfig.setOutputVcfHeaderFile(vcfHeaderFile.toString()); importConfig.setUseSamplesInOrder(true); importConfig.setFunctionToCallOnBatchCompletion(this::logMessageOnBatchCompletion); return importConfig; @@ -649,27 +652,24 @@ private AbstractFeatureReader getReaderFromPath(fi * * @return The workspace directory */ - private File overwriteOrCreateWorkspace() { - final File workspaceDir = new File(workspace); - - if (overwriteExistingWorkspace) { - IOUtils.tryDelete(workspaceDir); - } - - if (!workspaceDir.exists()) { - final int ret = GenomicsDBImporter.createTileDBWorkspace(workspaceDir.getAbsolutePath()); - if (ret > 0) { - checkIfValidWorkspace(workspaceDir); - logger.info("Importing data to GenomicsDB workspace: " + workspaceDir); - } else if (ret < 0) { - throw new UnableToCreateGenomicsDBWorkspace("Error creating GenomicsDB workspace: " + workspaceDir); - } - return workspaceDir; - } else { - throw new UnableToCreateGenomicsDBWorkspace("The workspace you're trying to create already exists. ( " + workspaceDir.getAbsolutePath() + " ) " + - "Writing into an existing workspace can cause data corruption. " + - "Please choose an output path that doesn't already exist. "); - } + private URI overwriteOrCreateWorkspace() { + URI workspaceURI; + + try { + if (workspace.endsWith("/")) { + workspaceURI = new URI(workspace); + } else { + workspaceURI = new URI(workspace + "/"); + } + } catch (URISyntaxException e) { + throw new UnableToCreateGenomicsDBWorkspace("Specified workspace " + workspace + " is not valid URI"); + } + + if (GenomicsDBUtils.createTileDBWorkspace(workspaceURI.toString(), overwriteExistingWorkspace) < 0) { + throw new UnableToCreateGenomicsDBWorkspace("Error creating GenomicsDB workspace: " + workspace); + } + + return workspaceURI; } static class UnableToCreateGenomicsDBWorkspace extends UserException { @@ -680,13 +680,6 @@ static class UnableToCreateGenomicsDBWorkspace extends UserException { } } - private static void checkIfValidWorkspace(final File workspaceDir) { - final File tempFile = new File(workspaceDir.getAbsolutePath() + "/__tiledb_workspace.tdb"); - if (!tempFile.exists()) { - throw new UserException(workspaceDir.getAbsolutePath() + " is not a valid GenomicsDB workspace"); - } - } - /** * Loads our intervals using the best available sequence * dictionary (as returned by {@link #getBestAvailableSequenceDictionary}) From 61cd1d9c806b0a8397da39cacfaa67421c063992 Mon Sep 17 00:00:00 2001 From: nalinigans Date: Tue, 17 Jul 2018 10:35:47 -0700 Subject: [PATCH 02/12] Push the URI processing for GenomicsDB to BucketUtils --- .../hellbender/engine/FeatureDataSource.java | 35 +++++---------- .../tools/genomicsdb/GenomicsDBImport.java | 44 +++++++------------ .../hellbender/utils/gcs/BucketUtils.java | 14 ++++++ .../hellbender/utils/gcs/BucketUtilsTest.java | 9 ++++ 4 files changed, 49 insertions(+), 53 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java index 9a43b943db8..c4184166211 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java @@ -25,8 +25,6 @@ import java.io.File; import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; import java.nio.file.Path; @@ -372,21 +370,10 @@ private static FeatureReader getGenomicsDBFeatureReader(final St throw new IllegalArgumentException("Trying to create a GenomicsDBReader from a non-GenomicsDB input"); } - final String noheader = path.replace(GENOMIC_DB_URI_SCHEME, ""); - - final URI workspace; - try { - if (noheader.endsWith("/")) { - workspace = new URI(noheader); - } else { - workspace = new URI(noheader + "/"); - } - } catch (URISyntaxException e) { - throw new UserException("GenomicsDB workspace " + path + " is not valid URI"); - } - final URI callsetJson = workspace.resolve(GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); - final URI vidmapJson = workspace.resolve(GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); - final URI vcfHeader = workspace.resolve(GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); + final String workspace = path.replace(GENOMIC_DB_URI_SCHEME, ""); + final String callsetJson = BucketUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); + final String vidmapJson = BucketUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); + final String vcfHeader = BucketUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); final GenomicsDBExportConfiguration.ExportConfiguration exportConfigurationBuilder = createExportConfiguration(reference, workspace, callsetJson, vidmapJson, vcfHeader); @@ -398,16 +385,16 @@ private static FeatureReader getGenomicsDBFeatureReader(final St } } - private static GenomicsDBExportConfiguration.ExportConfiguration createExportConfiguration(final File reference, final URI workspace, - final URI callsetJson, final URI vidmapJson, - final URI vcfHeader) { + private static GenomicsDBExportConfiguration.ExportConfiguration createExportConfiguration(final File reference, final String workspace, + final String callsetJson, final String vidmapJson, + final String vcfHeader) { GenomicsDBExportConfiguration.ExportConfiguration.Builder exportConfigurationBuilder = GenomicsDBExportConfiguration.ExportConfiguration.newBuilder() - .setWorkspace(workspace.toString()) + .setWorkspace(workspace) .setReferenceGenome(reference.getAbsolutePath()) - .setVidMappingFile(vidmapJson.toString()) - .setCallsetMappingFile(callsetJson.toString()) - .setVcfHeaderFilename(vcfHeader.toString()) + .setVidMappingFile(vidmapJson) + .setCallsetMappingFile(callsetJson) + .setVcfHeaderFilename(vcfHeader) .setProduceGTField(false) .setProduceGTWithMinPLValueForSpanningDeletions(false) .setSitesOnlyQuery(false) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java index 3692d26f4db..626834d1a49 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java @@ -33,13 +33,12 @@ import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.gcs.BucketUtils; import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.nio.SeekableByteChannelPrefetcher; import java.io.File; import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; import java.nio.file.Path; @@ -293,13 +292,13 @@ public int getDefaultCloudIndexPrefetchBufferSize() { private SAMSequenceDictionary mergedHeaderSequenceDictionary; // Path to vidmap file to be written by GenomicsDBImporter - private URI vidMapJSONFile; + private String vidMapJSONFile; // Path to callsetmap file to be written by GenomicsDBImporter - private URI callsetMapJSONFile; + private String callsetMapJSONFile; // Path to combined VCF header file to be written by GenomicsDBImporter - private URI vcfHeaderFile; + private String vcfHeaderFile; // GenomicsDB callset map protobuf structure containing all callset names // used to write the callset json file on traversal success @@ -461,16 +460,15 @@ public static SortedMap loadSampleNameMapFileInSortedOrder(final P @Override public void onTraversalStart() { - final URI workspaceDir = overwriteOrCreateWorkspace(); + String workspaceDir = overwriteOrCreateWorkspace(); - vidMapJSONFile = workspaceDir.resolve(GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); - callsetMapJSONFile = workspaceDir.resolve(GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); - vcfHeaderFile = workspaceDir.resolve(GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); + vidMapJSONFile = BucketUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); + callsetMapJSONFile = BucketUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); + vcfHeaderFile = BucketUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); logger.info("Vid Map JSON file will be written to " + vidMapJSONFile); logger.info("Callset Map JSON file will be written to " + callsetMapJSONFile); logger.info("Complete VCF Header will be written to " + vcfHeaderFile); - logger.info("Importing to array - " + workspace + "/" + GenomicsDBConstants.DEFAULT_ARRAY_NAME); initializeInputPreloadExecutorService(); } @@ -532,9 +530,9 @@ private ImportConfig createImportConfig(final int batchSize) { importConfigurationBuilder.setConsolidateTiledbArrayAfterLoad(doConsolidation); ImportConfig importConfig = new ImportConfig(importConfigurationBuilder.build(), validateSampleToReaderMap, true, batchSize, mergedHeaderLines, sampleNameToVcfPath, this::createSampleToReaderMap); - importConfig.setOutputCallsetmapJsonFile(callsetMapJSONFile.toString()); - importConfig.setOutputVidmapJsonFile(vidMapJSONFile.toString()); - importConfig.setOutputVcfHeaderFile(vcfHeaderFile.toString()); + importConfig.setOutputCallsetmapJsonFile(callsetMapJSONFile); + importConfig.setOutputVidmapJsonFile(vidMapJSONFile); + importConfig.setOutputVcfHeaderFile(vcfHeaderFile); importConfig.setUseSamplesInOrder(true); importConfig.setFunctionToCallOnBatchCompletion(this::logMessageOnBatchCompletion); return importConfig; @@ -652,24 +650,12 @@ private AbstractFeatureReader getReaderFromPath(fi * * @return The workspace directory */ - private URI overwriteOrCreateWorkspace() { - URI workspaceURI; - - try { - if (workspace.endsWith("/")) { - workspaceURI = new URI(workspace); - } else { - workspaceURI = new URI(workspace + "/"); - } - } catch (URISyntaxException e) { - throw new UnableToCreateGenomicsDBWorkspace("Specified workspace " + workspace + " is not valid URI"); - } - - if (GenomicsDBUtils.createTileDBWorkspace(workspaceURI.toString(), overwriteExistingWorkspace) < 0) { + private String overwriteOrCreateWorkspace() { + String workspaceDir = BucketUtils.makeFilePathAbsolute(workspace); + if (GenomicsDBUtils.createTileDBWorkspace(workspaceDir, overwriteExistingWorkspace) < 0) { throw new UnableToCreateGenomicsDBWorkspace("Error creating GenomicsDB workspace: " + workspace); } - - return workspaceURI; + return workspaceDir; } static class UnableToCreateGenomicsDBWorkspace extends UserException { diff --git a/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java index bd2fbe5812e..1ff2b63881f 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java @@ -81,6 +81,20 @@ public static String makeFilePathAbsolute(String path){ } } + /** + * Appends path to the given dir/folder. java.nio.Path is used to append to dir with a scheme, otherwise java.io.File is used. + * @param dir the folder to append the path to + * @param path the path + * @return the appended path as a String. + */ + public static String appendPathToDir(String dir, String path) { + if (isCloudStorageUrl(dir) || isHadoopUrl(dir) || isFileUrl(dir)){ + return IOUtils.getPath(dir).resolve(path).toUri().toString(); + } else { + return new File(dir, path).getPath(); + } + } + /** * Open a file for reading regardless of whether it's on GCS, HDFS or local disk. * diff --git a/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java b/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java index 83bf6d25a75..9fa045e0395 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java @@ -34,6 +34,7 @@ public void testIsCloudStorageURL(){ // this does not throw NullPointerException. String x = "" + null + "://"; + } @Test @@ -57,6 +58,14 @@ public void testIsFileURL(){ Assert.assertFalse(BucketUtils.isFileUrl("gs://abucket")); } + @Test + public void testAppendPathToDir() { + Assert.assertEquals(BucketUtils.appendPathToDir("dir", "file"), "dir/file"); + Assert.assertEquals(BucketUtils.appendPathToDir("/path/to/dir", "anotherdir/file"), "/path/to/dir/anotherdir/file"); + Assert.assertEquals(BucketUtils.appendPathToDir("hdfs://namenode:9000/dir", "file"), "hdfs://namenode:9000/dir/file"); + Assert.assertEquals(BucketUtils.appendPathToDir("gs://abucket/dir", "file"), "gs://abucket/dir/file"); + } + @Test public void testCopyLocal() throws IOException { final String src = publicTestDir+"empty.vcf"; From 3c87e98056ce5b3ce3ecfd14cb78a6f31f15c873 Mon Sep 17 00:00:00 2001 From: nalinigans Date: Tue, 17 Jul 2018 12:55:26 -0700 Subject: [PATCH 03/12] Refine appendPathToDir --- .../hellbender/utils/gcs/BucketUtils.java | 13 ++++++++++--- .../hellbender/utils/gcs/BucketUtilsTest.java | 5 +++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java index 1ff2b63881f..189aacc7639 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java @@ -82,14 +82,21 @@ public static String makeFilePathAbsolute(String path){ } /** - * Appends path to the given dir/folder. java.nio.Path is used to append to dir with a scheme, otherwise java.io.File is used. + * Appends path to the given dir/folder. * @param dir the folder to append the path to - * @param path the path + * @param path the path relative to dir * @return the appended path as a String. */ public static String appendPathToDir(String dir, String path) { + if (path.startsWith("/")) { // Not a relative path + return path; + } if (isCloudStorageUrl(dir) || isHadoopUrl(dir) || isFileUrl(dir)){ - return IOUtils.getPath(dir).resolve(path).toUri().toString(); + if (dir.endsWith("/")) { + return dir+path; + } else { + return dir+'/'+path; + } } else { return new File(dir, path).getPath(); } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java b/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java index 9fa045e0395..42d4197c774 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java @@ -61,7 +61,12 @@ public void testIsFileURL(){ @Test public void testAppendPathToDir() { Assert.assertEquals(BucketUtils.appendPathToDir("dir", "file"), "dir/file"); + Assert.assertEquals(BucketUtils.appendPathToDir("dir/", "file"), "dir/file"); + Assert.assertEquals(BucketUtils.appendPathToDir("dir", "/file"), "/file"); + Assert.assertEquals(BucketUtils.appendPathToDir("dir/", "/file"), "/file"); Assert.assertEquals(BucketUtils.appendPathToDir("/path/to/dir", "anotherdir/file"), "/path/to/dir/anotherdir/file"); + + Assert.assertEquals(BucketUtils.appendPathToDir("file://dir", "file"), "file://dir/file"); Assert.assertEquals(BucketUtils.appendPathToDir("hdfs://namenode:9000/dir", "file"), "hdfs://namenode:9000/dir/file"); Assert.assertEquals(BucketUtils.appendPathToDir("gs://abucket/dir", "file"), "gs://abucket/dir/file"); } From 46ef8aaa88b49941bbd4899d3a7b4c2d6a1a8c72 Mon Sep 17 00:00:00 2001 From: nalinigans Date: Sun, 29 Jul 2018 13:05:11 -0700 Subject: [PATCH 04/12] Move to 0.10.0-proto-3.0.0-beta-1+d392491bafcac337 for GenomicsDB as suggested by @kgururaj in PR 5017 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 80e09807db5..225a3d9732f 100644 --- a/build.gradle +++ b/build.gradle @@ -64,7 +64,7 @@ final sparkVersion = System.getProperty('spark.version', '2.2.0') final hadoopVersion = System.getProperty('hadoop.version', '2.8.2') final hadoopBamVersion = System.getProperty('hadoopBam.version','7.10.0') final tensorflowVersion = System.getProperty('tensorflow.version','1.4.0') -final genomicsdbVersion = System.getProperty('genomicsdb.version','0.9.2-proto-3.0.0-beta-1+3102eb38d772-retry') +final genomicsdbVersion = System.getProperty('genomicsdb.version','0.10.0-proto-3.0.0-beta-1+d392491bafcac337') final testNGVersion = '6.11' // Using the shaded version to avoid conflicts between its protobuf dependency // and that of Hadoop/Spark (either the one we reference explicitly, or the one From 855cff72e3391722ed65a7861cb87ab7898e6c3a Mon Sep 17 00:00:00 2001 From: Nalini Ganapati Date: Sun, 19 Aug 2018 14:21:42 -0700 Subject: [PATCH 05/12] Incorporate changes suggested by @droazen in PR 5017 --- .../hellbender/engine/FeatureDataSource.java | 68 ++++++++++++++++--- .../hellbender/engine/FeatureInput.java | 3 +- .../tools/genomicsdb/GenomicsDBImport.java | 30 ++++---- .../hellbender/utils/gcs/BucketUtils.java | 21 ------ .../hellbender/utils/io/IOUtils.java | 18 +++++ .../utils/test/GenomicsDBTestUtils.java | 3 +- .../engine/FeatureDataSourceUnitTest.java | 34 ++++++++++ .../hellbender/utils/gcs/BucketUtilsTest.java | 13 ---- .../hellbender/utils/io/IOUtilsUnitTest.java | 19 ++++++ 9 files changed, 150 insertions(+), 59 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java index cbb87a4aadb..b43900e6968 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java @@ -26,13 +26,13 @@ import java.io.File; import java.io.IOException; import java.nio.channels.SeekableByteChannel; -import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.Iterator; import java.util.List; import java.util.Optional; import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * Enables traversals and queries over sources of Features, which are metadata associated with a location @@ -64,9 +64,14 @@ public final class FeatureDataSource implements GATKDataSourc private static final Logger logger = LogManager.getLogger(FeatureDataSource.class); /** - * identifies a path as a GenomicsDB URI + * Schemes starting with gendb could be GenomicsDB paths */ - public static final String GENOMIC_DB_URI_SCHEME = "gendb://"; + public static final String GENOMIC_DB_URI_SCHEME = "gendb"; + + /** + * Patterns identifying GenomicsDB paths + */ + private static final Pattern genomicsdb_uri_pattern = Pattern.compile("^" + GENOMIC_DB_URI_SCHEME + "(.?)(.*)(://)(.*)"); /** * Feature reader used to retrieve records from our file @@ -288,9 +293,50 @@ public FeatureDataSource(final FeatureInput featureInput, final int queryLook * @return true if path represent a GenomicsDB URI, otherwise false */ public static boolean isGenomicsDBPath(final String path) { - return path != null && path.startsWith(GENOMIC_DB_URI_SCHEME); + return getGenomicsDBPath(path) != null; + } + + public static String getGenomicsDBAbsolutePath(final String path) { + String genomicsdbPath = getGenomicsDBPath(path); + if (genomicsdbPath != null) { + return BucketUtils.makeFilePathAbsolute(getGenomicsDBPath(path)); + } else { + return null; + } + } + + /** + * If path is prefaced with gendb:// or gendb.://, returns a path acceptable + * by GenomicsDB. Otherwise, returns null. + * + * @param path gendb paths + * following are valid gendb URI examples + * gendb://my_folder + * gendb:///my_abs_folder + * gendb.hdfs://name_node/my_folder + * gendb.gs://my_bucket/my_folder + * gendb.s3://my_bucket/my_folder + * @return GenomicsDB acceptable path or null + */ + public static String getGenomicsDBPath(final String path) { + // genomicsdb_uri_pattern = Pattern.compile("^" + GENOMIC_DB_URI_SCHEME + "(.?)(.*)(://)(.*)"); + // gendb.supportedCloudURI:// + // ^^group2^^ ^^group4^^ + String genomicsdbPath = null; + if (path != null && path.startsWith(GENOMIC_DB_URI_SCHEME)) { // Check if path starts with "gendb" + Matcher matcher = genomicsdb_uri_pattern.matcher(path); + if (matcher.find() && !matcher.group(3).isEmpty()) { // path contains "://" + if (!matcher.group(1).isEmpty() && matcher.group(1).equals(".")) { // path has a period after gendb, so it is a URI + genomicsdbPath = matcher.group(2) + matcher.group(3) + matcher.group(4); + } else if (matcher.group(2).isEmpty()) { + genomicsdbPath = matcher.group(4); + } + } + } + return genomicsdbPath; } + @SuppressWarnings("unchecked") private static FeatureReader getFeatureReader(final FeatureInput featureInput, final Class targetFeatureType, final Function cloudWrapper, @@ -366,14 +412,14 @@ private static FeatureReader getFeatureReader(final Featu } private static FeatureReader getGenomicsDBFeatureReader(final String path, final File reference) { - if( !isGenomicsDBPath(path) ) { - throw new IllegalArgumentException("Trying to create a GenomicsDBReader from a non-GenomicsDB input"); + final String workspace = getGenomicsDBAbsolutePath(path); + if (workspace == null) { + throw new IllegalArgumentException("Trying to create a GenomicsDBReader from non-GenomicsDB input"); } - final String workspace = path.replace(GENOMIC_DB_URI_SCHEME, ""); - final String callsetJson = BucketUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); - final String vidmapJson = BucketUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); - final String vcfHeader = BucketUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); + final String callsetJson = IOUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); + final String vidmapJson = IOUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); + final String vcfHeader = IOUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); final GenomicsDBExportConfiguration.ExportConfiguration exportConfigurationBuilder = createExportConfiguration(reference, workspace, callsetJson, vidmapJson, vcfHeader); diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java index 017da21e7fd..e332c6d05ee 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java @@ -5,6 +5,7 @@ import htsjdk.tribble.FeatureCodec; import org.broadinstitute.barclay.argparser.CommandLineException; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.gcs.BucketUtils; import org.broadinstitute.hellbender.utils.io.IOUtils; import java.io.File; @@ -242,7 +243,7 @@ public void setFeatureCodecClass(final Class> featureCodecCla */ private static String makeIntoAbsolutePath(final String filePath){ if(FeatureDataSource.isGenomicsDBPath(filePath)){ - return FeatureDataSource.GENOMIC_DB_URI_SCHEME + new File(filePath.replace(FeatureDataSource.GENOMIC_DB_URI_SCHEME,"")).getAbsolutePath(); + return FeatureDataSource.getGenomicsDBAbsolutePath(filePath); } else if (URI.create(filePath).getScheme() != null) { return IOUtils.getPath(filePath).toAbsolutePath().toUri().toString(); } else { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java index aef29ad15f7..48cf5e59d63 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java @@ -37,7 +37,6 @@ import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.nio.SeekableByteChannelPrefetcher; -import java.io.File; import java.io.IOException; import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; @@ -460,12 +459,10 @@ public static SortedMap loadSampleNameMapFileInSortedOrder(final P */ @Override public void onTraversalStart() { - - String workspaceDir = overwriteOrCreateWorkspace(); - - vidMapJSONFile = BucketUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); - callsetMapJSONFile = BucketUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); - vcfHeaderFile = BucketUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); + String workspaceDir = BucketUtils.makeFilePathAbsolute(overwriteOrCreateWorkspace()); + vidMapJSONFile = IOUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); + callsetMapJSONFile = IOUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); + vcfHeaderFile = IOUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); logger.info("Vid Map JSON file will be written to " + vidMapJSONFile); logger.info("Callset Map JSON file will be written to " + callsetMapJSONFile); @@ -652,11 +649,20 @@ private AbstractFeatureReader getReaderFromPath(fi * @return The workspace directory */ private String overwriteOrCreateWorkspace() { - String workspaceDir = BucketUtils.makeFilePathAbsolute(workspace); - if (GenomicsDBUtils.createTileDBWorkspace(workspaceDir, overwriteExistingWorkspace) < 0) { - throw new UnableToCreateGenomicsDBWorkspace("Error creating GenomicsDB workspace: " + workspace); - } - return workspaceDir; + String workspaceDir = BucketUtils.makeFilePathAbsolute(workspace); + // From JavaDoc for GenomicsDBUtils.createTileDBWorkspace + // returnCode = 0 : OK + // returnCode = -1 : path was not a directory + // returnCode = -2 : failed to create workspace + // returnCode = 1 : existing directory, nothing changed + int returnCode = GenomicsDBUtils.createTileDBWorkspace(workspaceDir, overwriteExistingWorkspace); + if (returnCode < 0) { + throw new UnableToCreateGenomicsDBWorkspace("Error creating GenomicsDB workspace: " + workspace); + } else if (!overwriteExistingWorkspace && returnCode == 1) { + throw new UnableToCreateGenomicsDBWorkspace("Error creating GenomicsDB workspace: " + workspace + " already exists"); + } else { + return workspaceDir; + } } static class UnableToCreateGenomicsDBWorkspace extends UserException { diff --git a/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java index 189aacc7639..bd2fbe5812e 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java @@ -81,27 +81,6 @@ public static String makeFilePathAbsolute(String path){ } } - /** - * Appends path to the given dir/folder. - * @param dir the folder to append the path to - * @param path the path relative to dir - * @return the appended path as a String. - */ - public static String appendPathToDir(String dir, String path) { - if (path.startsWith("/")) { // Not a relative path - return path; - } - if (isCloudStorageUrl(dir) || isHadoopUrl(dir) || isFileUrl(dir)){ - if (dir.endsWith("/")) { - return dir+path; - } else { - return dir+'/'+path; - } - } else { - return new File(dir, path).getPath(); - } - } - /** * Open a file for reading regardless of whether it's on GCS, HDFS or local disk. * diff --git a/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java index 6052f042ea8..d9c9a97a913 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java @@ -534,6 +534,24 @@ public static Path getPath(String uriString) { } } + /** + * Appends path to the given parent dir. Parent dir could be a URI or a File. + * @param dir the folder to append the path to + * @param path the path relative to dir. + * @return the appended path as a String if path is relative, else path is returned. + */ + public static String appendPathToDir(String dir, String path) { + if (path.startsWith("/")) { // Already an absolute path + return path; + } + if (BucketUtils.isRemoteStorageUrl(dir) || BucketUtils.isFileUrl(dir)) { + Path dirPath = getPath(dir); + return dirPath.resolve(path).toUri().toString(); + } else { + return new File(dir, path).getPath(); + } + } + /** * @param path Path to test * @throws org.broadinstitute.hellbender.exceptions.UserException.CouldNotReadInputFile if the file isn't readable diff --git a/src/main/java/org/broadinstitute/hellbender/utils/test/GenomicsDBTestUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/test/GenomicsDBTestUtils.java index 835d4ab113f..3e7ff585431 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/test/GenomicsDBTestUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/test/GenomicsDBTestUtils.java @@ -11,6 +11,7 @@ public final class GenomicsDBTestUtils { + /** * don't instantiate a utility class */ @@ -21,7 +22,7 @@ private GenomicsDBTestUtils(){} * @return a string formatted as a genomicsDB uri pointing to the given workspace i.e "gendb:///pathTo/workspace */ public static String makeGenomicsDBUri(final File workspace){ - return FeatureDataSource.GENOMIC_DB_URI_SCHEME + workspace.getAbsolutePath(); + return FeatureDataSource.GENOMIC_DB_URI_SCHEME + "://" + workspace.getAbsolutePath(); } /** diff --git a/src/test/java/org/broadinstitute/hellbender/engine/FeatureDataSourceUnitTest.java b/src/test/java/org/broadinstitute/hellbender/engine/FeatureDataSourceUnitTest.java index b55ed87ac2f..9213f6b3348 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/FeatureDataSourceUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/FeatureDataSourceUnitTest.java @@ -63,6 +63,40 @@ public void testGetHeader() { Assert.assertTrue(header instanceof VCFHeader, "Header for " + QUERY_TEST_VCF.getAbsolutePath() + " not a VCFHeader"); } + @DataProvider(name = "GenomicsDBTestPathData") + public Object[][] genomicsDBTestPathData() { + return new Object[][]{ + //path, getGenomicsDBPath, isGenomicsDBPath + {null, null, false}, + {"", null, false}, + {"dfdfdf://fdfdf", null, false}, + {"fdfdf", null, false}, + {"gendbdfdfdf://fdfdf", null, false}, + {"gendb-dfdfdf://fdfdf", null, false}, + {"gendb-dfdfdf://", null, false}, + {"gendb", null, false}, + {"gendbdfdf", null, false}, + {"agendb://dfdfd", null, false}, + + {"gendb.dfdfdf://fdfdf", "dfdfdf://fdfdf", true}, + {"gendb://fdfdf", "fdfdf", true}, + {"gendb://", "", true}, + {"gendb:///fdfd", "/fdfd", true}, + {"gendb:///", "/", true}, + {"gendb.hdfs://this-node:9000/dir", "hdfs://this-node:9000/dir", true}, + {"gendb.gs://my-bucket/dir", "gs://my-bucket/dir", true}, + + {"gendb-hdfs://this-node:9000/dir", null, false}, + {"gendb-gs://this-node:9000/dir", null, false} + }; + } + + @Test(dataProvider = "GenomicsDBTestPathData") + public void testGenomicsDBPathParsing(String path, String expectedPath, boolean expectedComparison){ + Assert.assertEquals(FeatureDataSource.getGenomicsDBPath(path), expectedPath); + Assert.assertEquals(FeatureDataSource.isGenomicsDBPath(path), expectedComparison); + } + @Test public void testGetSequenceDictionary() { try (FeatureDataSource featureSource = new FeatureDataSource<>(QUERY_TEST_VCF, "CustomName")) { diff --git a/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java b/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java index 42d4197c774..dc430aa0d56 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java @@ -58,19 +58,6 @@ public void testIsFileURL(){ Assert.assertFalse(BucketUtils.isFileUrl("gs://abucket")); } - @Test - public void testAppendPathToDir() { - Assert.assertEquals(BucketUtils.appendPathToDir("dir", "file"), "dir/file"); - Assert.assertEquals(BucketUtils.appendPathToDir("dir/", "file"), "dir/file"); - Assert.assertEquals(BucketUtils.appendPathToDir("dir", "/file"), "/file"); - Assert.assertEquals(BucketUtils.appendPathToDir("dir/", "/file"), "/file"); - Assert.assertEquals(BucketUtils.appendPathToDir("/path/to/dir", "anotherdir/file"), "/path/to/dir/anotherdir/file"); - - Assert.assertEquals(BucketUtils.appendPathToDir("file://dir", "file"), "file://dir/file"); - Assert.assertEquals(BucketUtils.appendPathToDir("hdfs://namenode:9000/dir", "file"), "hdfs://namenode:9000/dir/file"); - Assert.assertEquals(BucketUtils.appendPathToDir("gs://abucket/dir", "file"), "gs://abucket/dir/file"); - } - @Test public void testCopyLocal() throws IOException { final String src = publicTestDir+"empty.vcf"; diff --git a/src/test/java/org/broadinstitute/hellbender/utils/io/IOUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/io/IOUtilsUnitTest.java index 34e74fe4823..98e4b3f65b0 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/io/IOUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/io/IOUtilsUnitTest.java @@ -167,6 +167,25 @@ private void innerTestGetPath(String s) throws IOException { Assert.assertTrue(size>0); } + @Test + public void testAppendPathToDir() throws Exception { + Assert.assertEquals(IOUtils.appendPathToDir("dir", "file"), "dir/file"); + Assert.assertEquals(IOUtils.appendPathToDir("dir/", "file"), "dir/file"); + Assert.assertEquals(IOUtils.appendPathToDir("dir", "/file"), "/file"); + Assert.assertEquals(IOUtils.appendPathToDir("dir/", "/file"), "/file"); + Assert.assertEquals(IOUtils.appendPathToDir("/path/to/dir", "anotherdir/file"), "/path/to/dir/anotherdir/file"); + + // hdfs: URI + Path tempPath = IOUtils.getPath(MiniClusterUtils.getWorkingDir(MiniClusterUtils.getMiniCluster()).toUri().toString()); + Assert.assertEquals(IOUtils.appendPathToDir(tempPath.toString(), "temp"), tempPath.toString()+"/temp"); + + // gs: URI + Assert.assertEquals(IOUtils.appendPathToDir("gs://abucket/dir", "file"), "gs://abucket/dir/file"); + + // file: URI + Assert.assertEquals(IOUtils.appendPathToDir("file:///dir", "file"), "file:///dir/file"); + } + @Test public void testSuccessfulCanReadFileCheck() { final File expectedFile = createTempFile("Utils-can-read-test",".txt"); From a11d7a25e4d9826f87206f6546b1f609579577c1 Mon Sep 17 00:00:00 2001 From: nalinigans Date: Sun, 19 Aug 2018 20:20:49 -0700 Subject: [PATCH 06/12] Migrate FeatureInput.java to use the new gendb://, gendb.gs:// and gendb.hdfs:// schemes --- .../hellbender/engine/FeatureDataSource.java | 13 ++++- .../hellbender/engine/FeatureInput.java | 3 +- .../engine/FeatureDataSourceUnitTest.java | 47 ++++++++++--------- .../engine/FeatureInputUnitTest.java | 14 +++++- 4 files changed, 50 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java index b43900e6968..c8e199613f9 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java @@ -296,7 +296,18 @@ public static boolean isGenomicsDBPath(final String path) { return getGenomicsDBPath(path) != null; } - public static String getGenomicsDBAbsolutePath(final String path) { + public static String getAbsolutePathWithGenDBScheme(final String path) { + String gendb_path = FeatureDataSource.getGenomicsDBAbsolutePath(path); + if (gendb_path == null) { + return null; + } else if (gendb_path.contains("://")) { + return FeatureDataSource.GENOMIC_DB_URI_SCHEME + "." + gendb_path; + } else { + return FeatureDataSource.GENOMIC_DB_URI_SCHEME + "://" + gendb_path; + } + } + + private static String getGenomicsDBAbsolutePath(final String path) { String genomicsdbPath = getGenomicsDBPath(path); if (genomicsdbPath != null) { return BucketUtils.makeFilePathAbsolute(getGenomicsDBPath(path)); diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java index e332c6d05ee..d04baaec69d 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java @@ -5,7 +5,6 @@ import htsjdk.tribble.FeatureCodec; import org.broadinstitute.barclay.argparser.CommandLineException; import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.utils.gcs.BucketUtils; import org.broadinstitute.hellbender.utils.io.IOUtils; import java.io.File; @@ -243,7 +242,7 @@ public void setFeatureCodecClass(final Class> featureCodecCla */ private static String makeIntoAbsolutePath(final String filePath){ if(FeatureDataSource.isGenomicsDBPath(filePath)){ - return FeatureDataSource.getGenomicsDBAbsolutePath(filePath); + return FeatureDataSource.getAbsolutePathWithGenDBScheme(filePath); } else if (URI.create(filePath).getScheme() != null) { return IOUtils.getPath(filePath).toAbsolutePath().toUri().toString(); } else { diff --git a/src/test/java/org/broadinstitute/hellbender/engine/FeatureDataSourceUnitTest.java b/src/test/java/org/broadinstitute/hellbender/engine/FeatureDataSourceUnitTest.java index 9213f6b3348..ebcdeea99b5 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/FeatureDataSourceUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/FeatureDataSourceUnitTest.java @@ -66,34 +66,35 @@ public void testGetHeader() { @DataProvider(name = "GenomicsDBTestPathData") public Object[][] genomicsDBTestPathData() { return new Object[][]{ - //path, getGenomicsDBPath, isGenomicsDBPath - {null, null, false}, - {"", null, false}, - {"dfdfdf://fdfdf", null, false}, - {"fdfdf", null, false}, - {"gendbdfdfdf://fdfdf", null, false}, - {"gendb-dfdfdf://fdfdf", null, false}, - {"gendb-dfdfdf://", null, false}, - {"gendb", null, false}, - {"gendbdfdf", null, false}, - {"agendb://dfdfd", null, false}, - - {"gendb.dfdfdf://fdfdf", "dfdfdf://fdfdf", true}, - {"gendb://fdfdf", "fdfdf", true}, - {"gendb://", "", true}, - {"gendb:///fdfd", "/fdfd", true}, - {"gendb:///", "/", true}, - {"gendb.hdfs://this-node:9000/dir", "hdfs://this-node:9000/dir", true}, - {"gendb.gs://my-bucket/dir", "gs://my-bucket/dir", true}, - - {"gendb-hdfs://this-node:9000/dir", null, false}, - {"gendb-gs://this-node:9000/dir", null, false} + //path, getGenomicsDBPath, getAbsolutePathWithGenDBScheme, isGenomicsDBPath + {null, null, null, false}, + {"", null, null, false}, + {"dfdfdf://fdfdf", null, null, false}, + {"fdfdf", null, null, false}, + {"gendbdfdfdf://fdfdf", null, null, false}, + {"gendb-dfdfdf://fdfdf", null, null, false}, + {"gendb-dfdfdf://", null, null, false}, + {"gendb", null, null, false}, + {"gendbdfdf", null, null, false}, + {"agendb://dfdfd", null, null, false}, + + {"gendb.dfdfdf://fdfdf", "dfdfdf://fdfdf", "gendb://"+new File("dfdfdf://fdfdf").getAbsolutePath(), true}, //Not supported URI. + {"gendb://fdfdf", "fdfdf", "gendb://"+new File("fdfdf").getAbsolutePath(), true}, + {"gendb://", "", "gendb://" + new File("").getAbsolutePath(), true}, + {"gendb:///fdfd", "/fdfd", "gendb:///fdfd", true}, + {"gendb:///", "/", "gendb:///", true}, + {"gendb.hdfs://this-node:9000/dir", "hdfs://this-node:9000/dir", "gendb.hdfs://this-node:9000/dir", true}, + {"gendb.gs://my-bucket/dir", "gs://my-bucket/dir", "gendb.gs://my-bucket/dir", true}, + + {"gendb-hdfs://this-node:9000/dir", null, null, false}, + {"gendb-gs://this-node:9000/dir", null, null, false} }; } @Test(dataProvider = "GenomicsDBTestPathData") - public void testGenomicsDBPathParsing(String path, String expectedPath, boolean expectedComparison){ + public void testGenomicsDBPathParsing(String path, String expectedPath, String gendbExpectedAbsolutePath, boolean expectedComparison){ Assert.assertEquals(FeatureDataSource.getGenomicsDBPath(path), expectedPath); + Assert.assertEquals(FeatureDataSource.getAbsolutePathWithGenDBScheme(path), gendbExpectedAbsolutePath); Assert.assertEquals(FeatureDataSource.isGenomicsDBPath(path), expectedComparison); } diff --git a/src/test/java/org/broadinstitute/hellbender/engine/FeatureInputUnitTest.java b/src/test/java/org/broadinstitute/hellbender/engine/FeatureInputUnitTest.java index 532ca6aa33a..f2f1aaca376 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/FeatureInputUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/FeatureInputUnitTest.java @@ -73,7 +73,19 @@ public Object[][] genDbPathAndNameData() { {"myname:gendb://myJsons", "gendb://myJsons", "myname"}, {"myname,key1=value1:gendb://myJsons", "gendb://myJsons", "myname"}, {"myname//:gendb://myJsons", "gendb://myJsons", "myname//"}, - {"myname:gendb://", "gendb://", "myname"} + {"myname:gendb://", "gendb://", "myname"}, + + {"gendb.gs://myBucket/myJsons", "gendb.gs://myBucket/myJsons", "gendb.gs://myBucket/myJsons"}, + {"myname:gendb.gs://myJsons", "gendb.gs://myJsons", "myname"}, + {"myname,key1=value1:gendb.gs://myJsons", "gendb.gs://myJsons", "myname"}, + {"myname//:gendb.gs://myJsons", "gendb.gs://myJsons", "myname//"}, + {"myname:gendb.gs://", "gendb.gs://", "myname"}, + + {"gendb.hdfs://localhost/myJsons", "gendb.hdfs://localhost/myJsons", "gendb.hdfs://localhost/myJsons"}, + {"myname:gendb.hdfs://myJsons", "gendb.hdfs://myJsons", "myname"}, + {"myname,key1=value1:gendb.hdfs://myJsons", "gendb.hdfs://myJsons", "myname"}, + {"myname//:gendb.hdfs://myJsons", "gendb.hdfs://myJsons", "myname//"}, + {"myname:gendb.hdfs://", "gendb.hdfs://", "myname"} }; } From b8650defaee5b97ceab5157aebae448779f088b3 Mon Sep 17 00:00:00 2001 From: Nalini Ganapati Date: Thu, 23 Aug 2018 12:47:32 -0700 Subject: [PATCH 07/12] Add GenomicsDB tests for writing into and reading from GCS --- .../GenomicsDBImportIntegrationTest.java | 73 ++++++++++++++++--- 1 file changed, 63 insertions(+), 10 deletions(-) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java index a8b8767bcb0..2b533c8379a 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.tools.genomicsdb; +import com.intel.genomicsdb.GenomicsDBUtils; import com.intel.genomicsdb.model.GenomicsDBExportConfiguration; import com.intel.genomicsdb.reader.GenomicsDBFeatureReader; import htsjdk.samtools.SAMSequenceDictionary; @@ -17,6 +18,7 @@ import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.Main; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.IntervalUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; @@ -32,9 +34,7 @@ import java.io.File; import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.nio.file.StandardCopyOption; +import java.nio.file.*; import java.util.*; import java.util.stream.Collectors; @@ -399,9 +399,9 @@ private void writeToGenomicsDB(final List vcfInputs, final List intervals, @@ -754,12 +754,13 @@ private static GenomicsDBFeatureReader final String workspace, final String reference, final boolean produceGTField, final boolean sitesOnlyQuery) throws IOException { - GenomicsDBExportConfiguration.ExportConfiguration exportConfiguration = GenomicsDBExportConfiguration.ExportConfiguration.newBuilder() + String workspaceAbsPath = BucketUtils.makeFilePathAbsolute(workspace); + GenomicsDBExportConfiguration.ExportConfiguration exportConfiguration = GenomicsDBExportConfiguration.ExportConfiguration.newBuilder() .setWorkspace(workspace) .setReferenceGenome(reference) - .setVidMappingFile(new File(workspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME).getAbsolutePath()) - .setCallsetMappingFile(new File(workspace, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME).getAbsolutePath()) - .setVcfHeaderFilename(new File(workspace, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME).getAbsolutePath()) + .setVidMappingFile(IOUtils.appendPathToDir(workspaceAbsPath, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME)) + .setCallsetMappingFile(IOUtils.appendPathToDir(workspaceAbsPath, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME)) + .setVcfHeaderFilename(IOUtils.appendPathToDir(workspaceAbsPath, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME)) .setProduceGTField(produceGTField) .setSitesOnlyQuery(sitesOnlyQuery) .setGenerateArrayNameFromPartitionBounds(true) @@ -779,4 +780,56 @@ public void testYouCantWriteIntoAnExistingDirectory(){ final String workspace = createTempDir("workspace").getAbsolutePath(); writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace, 0, false, 0, 1); } + + private void cleanupGCSFolder(String path) { + try { + if (BucketUtils.isCloudStorageUrl(path)) { + Files.list(Paths.get(path)).forEach(f -> { + try { + Files.deleteIfExists(Paths.get(f.toString())); + } catch (DirectoryNotEmptyException e1) { + cleanupGCSFolder(f.toString()); + } catch (IOException e) { + // Ignore for now + } + }); + Files.deleteIfExists(BucketUtils.getPathOnGcs(path)); + } + } catch (IOException e) { + // Ignore for now. + } + } + + @Test(groups = {"bucket"}) + public void testWriteToAndQueryFromGCS() throws IOException { + String workspace = BucketUtils.randomRemotePath(getGCPTestInputPath(), "",""); + try { + Assert.assertNotNull(getGoogleServiceAccountKeyPath()); + System.gc(); + writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace, 0, false, 0, 1); + checkJSONFilesAreWritten(workspace); + checkGenomicsDBAgainstExpected(workspace, INTERVAL, COMBINED, b38_reference_20_21, true); + } catch (UserException e) { + // Don't run this test as GOOGLE_APPLICATION_CREDENTIALS is not set + } finally { + cleanupGCSFolder(workspace); + } + } + + @Test(groups = {"bucket"}, expectedExceptions = GenomicsDBImport.UnableToCreateGenomicsDBWorkspace.class) + public void testWriteToExistingGCSDirectory() throws IOException { + String workspace = BucketUtils.randomRemotePath(getGCPTestInputPath(), "",""); + try { + Assert.assertNotNull(getGoogleServiceAccountKeyPath()); + int rc = GenomicsDBUtils.createTileDBWorkspace(workspace, false); + Assert.assertEquals(rc, 0); + writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace, 0, false, 0, 1); + } catch (GenomicsDBImport.UnableToCreateGenomicsDBWorkspace e1) { + throw e1; + } catch (UserException e) { + // Don't run this test as GOOGLE_APPLICATION_CREDENTIALS is not set + } finally { + cleanupGCSFolder(workspace); + } + } } From d430bbdb0f87644ed14625639c37000ad83ca629 Mon Sep 17 00:00:00 2001 From: nalinigans Date: Sat, 1 Sep 2018 16:28:40 -0700 Subject: [PATCH 08/12] Try test without gc --- .../tools/genomicsdb/GenomicsDBImportIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java index 2b533c8379a..9c0f6294287 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java @@ -805,7 +805,7 @@ public void testWriteToAndQueryFromGCS() throws IOException { String workspace = BucketUtils.randomRemotePath(getGCPTestInputPath(), "",""); try { Assert.assertNotNull(getGoogleServiceAccountKeyPath()); - System.gc(); + // System.gc(); writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace, 0, false, 0, 1); checkJSONFilesAreWritten(workspace); checkGenomicsDBAgainstExpected(workspace, INTERVAL, COMBINED, b38_reference_20_21, true); From cddf276f525ecbd692ae68804da9fa708d73d41f Mon Sep 17 00:00:00 2001 From: Nalini Ganapati Date: Sun, 9 Sep 2018 21:40:23 -0400 Subject: [PATCH 09/12] Refactor gendb URI processing to IOUtils and cleanup GenomicsDBTestUtils GCS specific tests --- .../hellbender/engine/FeatureDataSource.java | 94 ++++--------------- .../hellbender/engine/FeatureInput.java | 4 +- .../tools/genomicsdb/GenomicsDBImport.java | 5 +- .../hellbender/utils/io/IOUtils.java | 93 ++++++++++++++++++ .../engine/FeatureDataSourceUnitTest.java | 37 +------- .../GenomicsDBImportIntegrationTest.java | 52 ++-------- .../hellbender/utils/io/IOUtilsUnitTest.java | 38 ++++++++ .../testutils/GenomicsDBTestUtils.java | 4 +- 8 files changed, 164 insertions(+), 163 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java index c8e199613f9..f0f76e74606 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java @@ -26,13 +26,12 @@ import java.io.File; import java.io.IOException; import java.nio.channels.SeekableByteChannel; +import java.nio.file.Files; import java.nio.file.Path; import java.util.Iterator; import java.util.List; import java.util.Optional; import java.util.function.Function; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * Enables traversals and queries over sources of Features, which are metadata associated with a location @@ -63,16 +62,6 @@ public final class FeatureDataSource implements GATKDataSource, AutoCloseable { private static final Logger logger = LogManager.getLogger(FeatureDataSource.class); - /** - * Schemes starting with gendb could be GenomicsDB paths - */ - public static final String GENOMIC_DB_URI_SCHEME = "gendb"; - - /** - * Patterns identifying GenomicsDB paths - */ - private static final Pattern genomicsdb_uri_pattern = Pattern.compile("^" + GENOMIC_DB_URI_SCHEME + "(.?)(.*)(://)(.*)"); - /** * Feature reader used to retrieve records from our file */ @@ -266,7 +255,7 @@ public FeatureDataSource(final FeatureInput featureInput, final int queryLook // a query by interval is attempted. this.featureReader = getFeatureReader(featureInput, targetFeatureType, cloudWrapper, cloudIndexWrapper, reference); - if (isGenomicsDBPath(featureInput.getFeaturePath())) { + if (IOUtils.isGenomicsDBPath(featureInput.getFeaturePath())) { //genomics db uri's have no associated index file to read from, but they do support random access this.hasIndex = false; this.supportsRandomAccess = true; @@ -288,72 +277,13 @@ public FeatureDataSource(final FeatureInput featureInput, final int queryLook this.queryLookaheadBases = queryLookaheadBases; } - /** - * @param path String containing the path to test - * @return true if path represent a GenomicsDB URI, otherwise false - */ - public static boolean isGenomicsDBPath(final String path) { - return getGenomicsDBPath(path) != null; - } - - public static String getAbsolutePathWithGenDBScheme(final String path) { - String gendb_path = FeatureDataSource.getGenomicsDBAbsolutePath(path); - if (gendb_path == null) { - return null; - } else if (gendb_path.contains("://")) { - return FeatureDataSource.GENOMIC_DB_URI_SCHEME + "." + gendb_path; - } else { - return FeatureDataSource.GENOMIC_DB_URI_SCHEME + "://" + gendb_path; - } - } - - private static String getGenomicsDBAbsolutePath(final String path) { - String genomicsdbPath = getGenomicsDBPath(path); - if (genomicsdbPath != null) { - return BucketUtils.makeFilePathAbsolute(getGenomicsDBPath(path)); - } else { - return null; - } - } - - /** - * If path is prefaced with gendb:// or gendb.://, returns a path acceptable - * by GenomicsDB. Otherwise, returns null. - * - * @param path gendb paths - * following are valid gendb URI examples - * gendb://my_folder - * gendb:///my_abs_folder - * gendb.hdfs://name_node/my_folder - * gendb.gs://my_bucket/my_folder - * gendb.s3://my_bucket/my_folder - * @return GenomicsDB acceptable path or null - */ - public static String getGenomicsDBPath(final String path) { - // genomicsdb_uri_pattern = Pattern.compile("^" + GENOMIC_DB_URI_SCHEME + "(.?)(.*)(://)(.*)"); - // gendb.supportedCloudURI:// - // ^^group2^^ ^^group4^^ - String genomicsdbPath = null; - if (path != null && path.startsWith(GENOMIC_DB_URI_SCHEME)) { // Check if path starts with "gendb" - Matcher matcher = genomicsdb_uri_pattern.matcher(path); - if (matcher.find() && !matcher.group(3).isEmpty()) { // path contains "://" - if (!matcher.group(1).isEmpty() && matcher.group(1).equals(".")) { // path has a period after gendb, so it is a URI - genomicsdbPath = matcher.group(2) + matcher.group(3) + matcher.group(4); - } else if (matcher.group(2).isEmpty()) { - genomicsdbPath = matcher.group(4); - } - } - } - return genomicsdbPath; - } - @SuppressWarnings("unchecked") private static FeatureReader getFeatureReader(final FeatureInput featureInput, final Class targetFeatureType, final Function cloudWrapper, final Function cloudIndexWrapper, final Path reference) { - if (isGenomicsDBPath(featureInput.getFeaturePath())) { + if (IOUtils.isGenomicsDBPath(featureInput.getFeaturePath())) { try { if (reference == null) { throw new UserException.MissingReference("You must provide a reference if you want to load from GenomicsDB"); @@ -422,20 +352,30 @@ private static FeatureReader getFeatureReader(final Featu } } + private static void verifyPathsAreReadable(final String ... paths) { + for (String path : paths) { + IOUtils.assertFileIsReadable(IOUtils.getPath(path)); + } + } + + private static FeatureReader getGenomicsDBFeatureReader(final String path, final File reference) { - final String workspace = getGenomicsDBAbsolutePath(path); + final String workspace = IOUtils.getGenomicsDBAbsolutePath(path); if (workspace == null) { - throw new IllegalArgumentException("Trying to create a GenomicsDBReader from non-GenomicsDB input"); + throw new IllegalArgumentException("Trying to create a GenomicsDBReader from non-GenomicsDB input path " + path); + } else if (Files.notExists(IOUtils.getPath(workspace))) { + throw new UserException("GenomicsDB workspace " + path + " does not exist"); } final String callsetJson = IOUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); final String vidmapJson = IOUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); final String vcfHeader = IOUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); - final GenomicsDBExportConfiguration.ExportConfiguration exportConfigurationBuilder = - createExportConfiguration(reference, workspace, callsetJson, vidmapJson, vcfHeader); + verifyPathsAreReadable(callsetJson, vidmapJson, vcfHeader); try { + final GenomicsDBExportConfiguration.ExportConfiguration exportConfigurationBuilder = + createExportConfiguration(reference, workspace, callsetJson, vidmapJson, vcfHeader); return new GenomicsDBFeatureReader<>(exportConfigurationBuilder, new BCF2Codec(), Optional.empty()); } catch (final IOException e) { throw new UserException("Couldn't create GenomicsDBFeatureReader", e); diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java index d04baaec69d..9c100808717 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java @@ -241,8 +241,8 @@ public void setFeatureCodecClass(final Class> featureCodecCla * creates a name from the given filePath by finding the absolute path of the given input */ private static String makeIntoAbsolutePath(final String filePath){ - if(FeatureDataSource.isGenomicsDBPath(filePath)){ - return FeatureDataSource.getAbsolutePathWithGenDBScheme(filePath); + if(IOUtils.isGenomicsDBPath(filePath)){ + return IOUtils.getAbsolutePathWithGenDBScheme(filePath); } else if (URI.create(filePath).getScheme() != null) { return IOUtils.getPath(filePath).toAbsolutePath().toUri().toString(); } else { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java index 53e7079cc50..4855eff24d3 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java @@ -470,6 +470,7 @@ public void onTraversalStart() { logger.info("Vid Map JSON file will be written to " + vidMapJSONFile); logger.info("Callset Map JSON file will be written to " + callsetMapJSONFile); logger.info("Complete VCF Header will be written to " + vcfHeaderFile); + logger.info("Importing to array - " + workspaceDir + "/" + GenomicsDBConstants.DEFAULT_ARRAY_NAME); initializeInputPreloadExecutorService(); } @@ -654,10 +655,10 @@ private AbstractFeatureReader getReaderFromPath(fi private String overwriteOrCreateWorkspace() { String workspaceDir = BucketUtils.makeFilePathAbsolute(workspace); // From JavaDoc for GenomicsDBUtils.createTileDBWorkspace - // returnCode = 0 : OK + // returnCode = 0 : OK. If overwriteExistingWorkspace is true and the workspace exists, it is deleted first. // returnCode = -1 : path was not a directory // returnCode = -2 : failed to create workspace - // returnCode = 1 : existing directory, nothing changed + // returnCode = 1 : if overwriteExistingWorkspace is false, return 1 if directory already exists int returnCode = GenomicsDBUtils.createTileDBWorkspace(workspaceDir, overwriteExistingWorkspace); if (returnCode < 0) { throw new UnableToCreateGenomicsDBWorkspace("Error creating GenomicsDB workspace: " + workspace); diff --git a/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java index 2cf5c5fb4ae..ada213dcbcc 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java @@ -27,6 +27,8 @@ import java.nio.file.*; import java.util.Arrays; import java.util.HashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import java.util.zip.ZipException; @@ -38,6 +40,16 @@ public final class IOUtils { // see https://support.hdfgroup.org/HDF5/doc/H5.format.html private static final byte hdf5HeaderSignature[] = { (byte) 0x89, 'H', 'D', 'F', '\r', '\n', (byte) 0x1A, '\n' }; + /** + * Schemes starting with gendb could be GenomicsDB paths + */ + public static final String GENOMIC_DB_URI_SCHEME = "gendb"; + + /** + * Patterns identifying GenomicsDB paths + */ + private static final Pattern GENOMICSDB_URI_PATTERN = Pattern.compile("^" + GENOMIC_DB_URI_SCHEME + "(\\.?)(.*)(://)(.*)"); + /** * Returns true if the file's extension is CRAM. */ @@ -677,4 +689,85 @@ public static final String urlDecode(final String string) { throw new UserException("Could not decode sample name", ex); } } + + /** + * Check if a given path represents GenomicsDB URI. + * + * @param path String containing the path to test + * @return true if path represents a GenomicsDB URI, otherwise false + */ + public static boolean isGenomicsDBPath(final String path) { + return getGenomicsDBPath(path) != null; + } + + /** + * Get the GenomicsDB equivalent absolute URL for a given path + * + * @param gendbPath String representing legal gendb URI + * @return absolute gendb URI to the path + */ + public static String getAbsolutePathWithGenDBScheme(final String gendbPath) { + String path = getGenomicsDBAbsolutePath(gendbPath); + if (path == null) { + return null; + } else if (path.contains("://")) { + return GENOMIC_DB_URI_SCHEME + "." + path; + } else { + return GENOMIC_DB_URI_SCHEME + "://" + path; + } + } + + /** + * Gets the absolute Path for a GenomicsDB path + * + * @param gendbPath gendb URI + * @return absolute name to the given GenomicsDB path + * @see #getGenomicsDBPath(String) + */ + public static String getGenomicsDBAbsolutePath(final String gendbPath) { + String path = getGenomicsDBPath(gendbPath); + if (path == null) { + return null; + } else if (path.contains("://")) { + return path; + } else { + return new File(path).getAbsolutePath(); + } + } + + /** + * If path is prefaced with gendb:// or gendb.CloudURIScheme://, this method returns an absolute path acceptable + * by GenomicsDB by stripping off gendb:// for files or gendb. for Cloud URIs respectively . + * Otherwise, returns null. + * + * @param path GenomicsDB paths that start with gendb:// or gendb.CloudURIScheme://
+ * Following are valid gendb URI examples + *
    + *
  • gendb://my_folder + *
  • gendb:///my_abs_folder + *
  • gendb.hdfs://name_node/my_folder + *
  • gendb.gs://my_bucket/my_folder + *
  • gendb.s3://my_bucket/my_folder + *
+ * @return Valid GenomicsDB path or null + */ + public static String getGenomicsDBPath(final String path) { + // GENOMICSDB_URI_PATTERN = Pattern.compile("^" + GENOMIC_DB_URI_SCHEME + "(\\.?)(.*)(://)(.*)"); + // gendb.supportedCloudURI:// + // ^^group2^^ ^^group4^^ + String genomicsdbPath = null; + if (path != null && path.startsWith(GENOMIC_DB_URI_SCHEME)) { // Check if path starts with "gendb" + Matcher matcher = GENOMICSDB_URI_PATTERN.matcher(path); + if (matcher.find() && !matcher.group(3).isEmpty()) { // path contains "://" + if (!matcher.group(1).isEmpty()) { // path has a period after gendb, so it is a URI + if (!matcher.group(2).isEmpty()) { //path has a scheme, so it is valid URI for GenomicsDB + genomicsdbPath = matcher.group(2) + matcher.group(3) + matcher.group(4); + } + } else if (matcher.group(2).isEmpty()) { + genomicsdbPath = matcher.group(4); + } + } + } + return genomicsdbPath; + } } diff --git a/src/test/java/org/broadinstitute/hellbender/engine/FeatureDataSourceUnitTest.java b/src/test/java/org/broadinstitute/hellbender/engine/FeatureDataSourceUnitTest.java index ebcdeea99b5..7424c1523c4 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/FeatureDataSourceUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/FeatureDataSourceUnitTest.java @@ -6,10 +6,10 @@ import htsjdk.variant.vcf.VCFFileReader; import htsjdk.variant.vcf.VCFHeader; import org.apache.commons.lang3.tuple.Pair; -import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.utils.io.IOUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -63,41 +63,6 @@ public void testGetHeader() { Assert.assertTrue(header instanceof VCFHeader, "Header for " + QUERY_TEST_VCF.getAbsolutePath() + " not a VCFHeader"); } - @DataProvider(name = "GenomicsDBTestPathData") - public Object[][] genomicsDBTestPathData() { - return new Object[][]{ - //path, getGenomicsDBPath, getAbsolutePathWithGenDBScheme, isGenomicsDBPath - {null, null, null, false}, - {"", null, null, false}, - {"dfdfdf://fdfdf", null, null, false}, - {"fdfdf", null, null, false}, - {"gendbdfdfdf://fdfdf", null, null, false}, - {"gendb-dfdfdf://fdfdf", null, null, false}, - {"gendb-dfdfdf://", null, null, false}, - {"gendb", null, null, false}, - {"gendbdfdf", null, null, false}, - {"agendb://dfdfd", null, null, false}, - - {"gendb.dfdfdf://fdfdf", "dfdfdf://fdfdf", "gendb://"+new File("dfdfdf://fdfdf").getAbsolutePath(), true}, //Not supported URI. - {"gendb://fdfdf", "fdfdf", "gendb://"+new File("fdfdf").getAbsolutePath(), true}, - {"gendb://", "", "gendb://" + new File("").getAbsolutePath(), true}, - {"gendb:///fdfd", "/fdfd", "gendb:///fdfd", true}, - {"gendb:///", "/", "gendb:///", true}, - {"gendb.hdfs://this-node:9000/dir", "hdfs://this-node:9000/dir", "gendb.hdfs://this-node:9000/dir", true}, - {"gendb.gs://my-bucket/dir", "gs://my-bucket/dir", "gendb.gs://my-bucket/dir", true}, - - {"gendb-hdfs://this-node:9000/dir", null, null, false}, - {"gendb-gs://this-node:9000/dir", null, null, false} - }; - } - - @Test(dataProvider = "GenomicsDBTestPathData") - public void testGenomicsDBPathParsing(String path, String expectedPath, String gendbExpectedAbsolutePath, boolean expectedComparison){ - Assert.assertEquals(FeatureDataSource.getGenomicsDBPath(path), expectedPath); - Assert.assertEquals(FeatureDataSource.getAbsolutePathWithGenDBScheme(path), gendbExpectedAbsolutePath); - Assert.assertEquals(FeatureDataSource.isGenomicsDBPath(path), expectedComparison); - } - @Test public void testGetSequenceDictionary() { try (FeatureDataSource featureSource = new FeatureDataSource<>(QUERY_TEST_VCF, "CustomName")) { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java index 9c0f6294287..9d543ce235d 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java @@ -781,55 +781,19 @@ public void testYouCantWriteIntoAnExistingDirectory(){ writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace, 0, false, 0, 1); } - private void cleanupGCSFolder(String path) { - try { - if (BucketUtils.isCloudStorageUrl(path)) { - Files.list(Paths.get(path)).forEach(f -> { - try { - Files.deleteIfExists(Paths.get(f.toString())); - } catch (DirectoryNotEmptyException e1) { - cleanupGCSFolder(f.toString()); - } catch (IOException e) { - // Ignore for now - } - }); - Files.deleteIfExists(BucketUtils.getPathOnGcs(path)); - } - } catch (IOException e) { - // Ignore for now. - } - } - @Test(groups = {"bucket"}) public void testWriteToAndQueryFromGCS() throws IOException { - String workspace = BucketUtils.randomRemotePath(getGCPTestInputPath(), "",""); - try { - Assert.assertNotNull(getGoogleServiceAccountKeyPath()); - // System.gc(); - writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace, 0, false, 0, 1); - checkJSONFilesAreWritten(workspace); - checkGenomicsDBAgainstExpected(workspace, INTERVAL, COMBINED, b38_reference_20_21, true); - } catch (UserException e) { - // Don't run this test as GOOGLE_APPLICATION_CREDENTIALS is not set - } finally { - cleanupGCSFolder(workspace); - } + final String workspace = BucketUtils.randomRemotePath(getGCPTestStaging(), "", ""); + writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace, 0, false, 0, 1); + checkJSONFilesAreWritten(workspace); + checkGenomicsDBAgainstExpected(workspace, INTERVAL, COMBINED, b38_reference_20_21, true); } @Test(groups = {"bucket"}, expectedExceptions = GenomicsDBImport.UnableToCreateGenomicsDBWorkspace.class) public void testWriteToExistingGCSDirectory() throws IOException { - String workspace = BucketUtils.randomRemotePath(getGCPTestInputPath(), "",""); - try { - Assert.assertNotNull(getGoogleServiceAccountKeyPath()); - int rc = GenomicsDBUtils.createTileDBWorkspace(workspace, false); - Assert.assertEquals(rc, 0); - writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace, 0, false, 0, 1); - } catch (GenomicsDBImport.UnableToCreateGenomicsDBWorkspace e1) { - throw e1; - } catch (UserException e) { - // Don't run this test as GOOGLE_APPLICATION_CREDENTIALS is not set - } finally { - cleanupGCSFolder(workspace); - } + final String workspace = BucketUtils.randomRemotePath(getGCPTestStaging(), "", ""); + int rc = GenomicsDBUtils.createTileDBWorkspace(workspace, false); + Assert.assertEquals(rc, 0); + writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace, 0, false, 0, 1); } } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/io/IOUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/io/IOUtilsUnitTest.java index 8dc5598ce90..638616277ac 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/io/IOUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/io/IOUtilsUnitTest.java @@ -389,4 +389,42 @@ public void testIsHDF5File(final String filePath, final boolean expected) { Assert.assertEquals(IOUtils.isHDF5File(testPath), expected); } + @DataProvider(name = "GenomicsDBTestPathData") + public Object[][] genomicsDBTestPathData() { + return new Object[][]{ + //path, getGenomicsDBPath, getAbsolutePathWithGenDBScheme, isGenomicsDBPath + {null, null, null, false}, + {"", null, null, false}, + {"dfdfdf://fdfdf", null, null, false}, + {"fdfdf", null, null, false}, + {"gendbdfdfdf://fdfdf", null, null, false}, + {"gendb-dfdfdf://fdfdf", null, null, false}, + {"gendb-dfdfdf://", null, null, false}, + {"gendb", null, null, false}, + {"gendbdfdf", null, null, false}, + {"agendb://dfdfd", null, null, false}, + {"gendb.://fdfdf", null, null, false}, + {"gendb.", null, null, false}, + + {"gendb.dfdfdf://fdfdf", "dfdfdf://fdfdf", "gendb.dfdfdf://fdfdf", true}, + {"gendb://fdfdf", "fdfdf", "gendb://" + new File("fdfdf").getAbsolutePath(), true}, + {"gendb://", "", "gendb://" + new File("").getAbsolutePath(), true}, + {"gendb:///fdfd", "/fdfd", "gendb:///fdfd", true}, + {"gendb:///", "/", "gendb:///", true}, + {"gendb.hdfs://this-node:9000/dir", "hdfs://this-node:9000/dir", "gendb.hdfs://this-node:9000/dir", true}, + {"gendb.gs://my-bucket/dir", "gs://my-bucket/dir", "gendb.gs://my-bucket/dir", true}, + {"gendb.s3://my-bucket/dir", "s3://my-bucket/dir", "gendb.s3://my-bucket/dir", true}, + + {"gendb-hdfs://this-node:9000/dir", null, null, false}, + {"gendb-gs://this-node:9000/dir", null, null, false} + }; + } + + @Test(dataProvider = "GenomicsDBTestPathData") + public void testGenomicsDBPathParsing(String path, String expectedPath, String gendbExpectedAbsolutePath, boolean expectedComparison) { + Assert.assertEquals(IOUtils.getGenomicsDBPath(path), expectedPath, "Got 1 "+IOUtils.getGenomicsDBPath(path)); + Assert.assertEquals(IOUtils.getAbsolutePathWithGenDBScheme(path), gendbExpectedAbsolutePath); + Assert.assertEquals(IOUtils.isGenomicsDBPath(path), expectedComparison, "Got 3 " + IOUtils.isGenomicsDBPath(path)); + } + } diff --git a/src/testUtils/java/org/broadinstitute/hellbender/testutils/GenomicsDBTestUtils.java b/src/testUtils/java/org/broadinstitute/hellbender/testutils/GenomicsDBTestUtils.java index 04c389bec9e..ddeb97a0238 100644 --- a/src/testUtils/java/org/broadinstitute/hellbender/testutils/GenomicsDBTestUtils.java +++ b/src/testUtils/java/org/broadinstitute/hellbender/testutils/GenomicsDBTestUtils.java @@ -1,9 +1,9 @@ package org.broadinstitute.hellbender.testutils; import htsjdk.samtools.util.Locatable; -import org.broadinstitute.hellbender.engine.FeatureDataSource; import org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBImport; import org.broadinstitute.hellbender.utils.IntervalUtils; +import org.broadinstitute.hellbender.utils.io.IOUtils; import java.io.File; import java.util.Collections; @@ -22,7 +22,7 @@ private GenomicsDBTestUtils(){} * @return a string formatted as a genomicsDB uri pointing to the given workspace i.e "gendb:///pathTo/workspace */ public static String makeGenomicsDBUri(final File workspace){ - return FeatureDataSource.GENOMIC_DB_URI_SCHEME + "://" + workspace.getAbsolutePath(); + return IOUtils.GENOMIC_DB_URI_SCHEME + "://" + workspace.getAbsolutePath(); } /** From 35766654dabc69bc0b0f60b82b847e1c81b22265 Mon Sep 17 00:00:00 2001 From: Nalini Ganapati Date: Tue, 11 Sep 2018 13:24:26 -0700 Subject: [PATCH 10/12] Delete GCS folders recursively in GCS unit tests --- .../hellbender/utils/gcs/BucketUtils.java | 24 ++++++++++++++++- .../GenomicsDBImportIntegrationTest.java | 7 ++--- .../hellbender/utils/gcs/BucketUtilsTest.java | 26 +++++++++++++++++++ 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java index ddea418aade..c29a27120b0 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java @@ -28,7 +28,10 @@ import java.io.*; import java.nio.file.Files; import java.util.Arrays; +import java.util.Comparator; +import java.util.List; import java.util.UUID; +import java.util.stream.Collectors; /** * Utilities for dealing with google buckets. @@ -173,6 +176,21 @@ public static void deleteFile(String pathToDelete) throws IOException { } } + /** + * Delete rootPath recursively using nio2 + * @param rootPath is the file/directory to be deleted. rootPath can point to a File or a URI. + * @throws IOException + */ + public static void deleteRecursively(final String rootPath) throws IOException { + final List pathsToDelete = + Files.walk(IOUtils.getPath(rootPath)) + .sorted(Comparator.reverseOrder()) + .collect(Collectors.toList()); + for (java.nio.file.Path path : pathsToDelete) { + Files.deleteIfExists(path); + } + } + /** * Get a temporary file path based on the prefix and extension provided. * This file (and possible indexes associated with it) will be scheduled for deletion on shutdown @@ -209,7 +227,11 @@ public static void deleteOnExit(String fileToDelete){ @Override public void run() { try { - deleteFile(fileToDelete); + if (Files.isDirectory(IOUtils.getPath(fileToDelete))) { + deleteRecursively(fileToDelete); + } else { + deleteFile(fileToDelete); + } } catch (IOException e) { logger.warn("Failed to delete file: " + fileToDelete+ ".", e); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java index 3f9afec99c5..4b8a8c36140 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java @@ -18,7 +18,6 @@ import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.Main; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; -import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.IntervalUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; @@ -788,7 +787,8 @@ public void testYouCantWriteIntoAnExistingDirectory(){ @Test(groups = {"bucket"}) public void testWriteToAndQueryFromGCS() throws IOException { - final String workspace = BucketUtils.randomRemotePath(getGCPTestStaging(), "", ""); + final String workspace = BucketUtils.randomRemotePath(getGCPTestStaging(), "", "") + "/"; + BucketUtils.deleteOnExit(workspace); writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace, 0, false, 0, 1); checkJSONFilesAreWritten(workspace); checkGenomicsDBAgainstExpected(workspace, INTERVAL, COMBINED, b38_reference_20_21, true); @@ -796,7 +796,8 @@ public void testWriteToAndQueryFromGCS() throws IOException { @Test(groups = {"bucket"}, expectedExceptions = GenomicsDBImport.UnableToCreateGenomicsDBWorkspace.class) public void testWriteToExistingGCSDirectory() throws IOException { - final String workspace = BucketUtils.randomRemotePath(getGCPTestStaging(), "", ""); + final String workspace = BucketUtils.randomRemotePath(getGCPTestStaging(), "", "") + "/"; + BucketUtils.deleteOnExit(workspace); int rc = GenomicsDBUtils.createTileDBWorkspace(workspace, false); Assert.assertEquals(rc, 0); writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace, 0, false, 0, 1); diff --git a/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java b/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java index 782f2e1039d..8ec1eaa1425 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/gcs/BucketUtilsTest.java @@ -3,11 +3,13 @@ import com.google.cloud.storage.contrib.nio.CloudStorageConfiguration; import htsjdk.samtools.util.IOUtil; import java.net.URI; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.testutils.MiniClusterUtils; import org.broadinstitute.hellbender.utils.config.ConfigFactory; +import org.broadinstitute.hellbender.utils.io.IOUtils; import org.testng.Assert; import org.testng.annotations.Test; @@ -171,4 +173,28 @@ public void testDirSizeGCS() throws IOException, GeneralSecurityException { Assert.assertFalse(BucketUtils.fileExists(intermediate)); } + @Test + public void testDeleteRecursively() throws IOException { + final File dir = Files.createTempDirectory("test-dir").normalize().toFile(); + final File file = new File(dir, "new-file"); + Assert.assertTrue(file.createNewFile()); + Assert.assertTrue(file.exists()); + BucketUtils.deleteRecursively(dir.toString()); + Assert.assertFalse(dir.exists()); + } + + @Test(groups={"bucket"}) + public void testDeleteRecursivelyGCS() throws IOException { + final String gcsFolder = BucketUtils.randomRemotePath(getGCPTestStaging(), "test-dir", ""); + final Path gcsFolderPath = IOUtils.getPath(gcsFolder+"/"); + Files.createDirectory(gcsFolderPath); + Assert.assertTrue(Files.exists(gcsFolderPath)); + Assert.assertTrue(Files.isDirectory(gcsFolderPath)); + final Path gcsFilePath = Files.createFile(IOUtils.getPath(gcsFolder+"/"+"new-file")); + Assert.assertTrue(Files.exists(gcsFilePath)); + BucketUtils.deleteRecursively(gcsFolderPath.toUri().toString()); + Assert.assertFalse(Files.exists(gcsFilePath)); + Assert.assertFalse(Files.exists(IOUtils.getPath(gcsFolder))); + } + } From f3caceae074a7c56feb4909a9baa58ad7b389a55 Mon Sep 17 00:00:00 2001 From: Nalini Ganapati Date: Wed, 19 Sep 2018 15:12:23 -0700 Subject: [PATCH 11/12] Debug Cloud Tests --- .../GenomicsDBImportIntegrationTest.java | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java index 4b8a8c36140..66129189e7d 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java @@ -1,5 +1,7 @@ package org.broadinstitute.hellbender.tools.genomicsdb; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import com.intel.genomicsdb.GenomicsDBUtils; import com.intel.genomicsdb.model.GenomicsDBExportConfiguration; import com.intel.genomicsdb.reader.GenomicsDBFeatureReader; @@ -786,6 +788,37 @@ public void testYouCantWriteIntoAnExistingDirectory(){ } @Test(groups = {"bucket"}) + public void testYDebugGenomicsDBSupport() throws IOException { + String creds = System.getenv("GOOGLE_APPLICATION_CREDENTIALS"); + Assert.assertNotNull(creds); + System.out.println("GOOG_CRED= " + creds); + + String hellbender_creds = System.getenv("HELLBENDER_JSON_SERVICE_ACCOUNT_KEY"); + Assert.assertNotNull(hellbender_creds); + System.out.print("HELLBENDER_CRED=" + hellbender_creds); + + if (new File(creds).getAbsolutePath().equals(new File(hellbender_creds).getAbsolutePath())) { + System.out.println("Both creds identical"); + } else { + System.out.println("creds:"+new File(creds).getAbsolutePath()); + System.out.println("hellbender_creds:"+new File(hellbender_creds).getAbsolutePath()); + } + + byte[] mapData = Files.readAllBytes(IOUtils.getPath(System.getenv("GOOGLE_APPLICATION_CREDENTIALS"))); + HashMap myMap = new HashMap(); + + ObjectMapper objectMapper = new ObjectMapper(); + myMap = objectMapper.readValue(mapData, new TypeReference>() {}); + myMap.forEach((key, value) -> { + if (key.contains("private") || key.contains("id")) { + System.out.println("Key=" + key); + } else { + System.out.println("Key=" + key + " Value=" + value); + } + }); + } + + /*@Test(groups = {"bucket"}) public void testWriteToAndQueryFromGCS() throws IOException { final String workspace = BucketUtils.randomRemotePath(getGCPTestStaging(), "", "") + "/"; BucketUtils.deleteOnExit(workspace); @@ -801,5 +834,5 @@ public void testWriteToExistingGCSDirectory() throws IOException { int rc = GenomicsDBUtils.createTileDBWorkspace(workspace, false); Assert.assertEquals(rc, 0); writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace, 0, false, 0, 1); - } + }*/ } From ace4d518525e99583fe62971c0191ef9e63537dc Mon Sep 17 00:00:00 2001 From: Nalini Ganapati Date: Wed, 19 Sep 2018 16:50:01 -0700 Subject: [PATCH 12/12] Try trigger another build --- .../tools/genomicsdb/GenomicsDBImportIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java index 66129189e7d..c5fc685fb71 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java @@ -788,7 +788,7 @@ public void testYouCantWriteIntoAnExistingDirectory(){ } @Test(groups = {"bucket"}) - public void testYDebugGenomicsDBSupport() throws IOException { + public void testDebugGenomicsDBSupport() throws IOException { String creds = System.getenv("GOOGLE_APPLICATION_CREDENTIALS"); Assert.assertNotNull(creds); System.out.println("GOOG_CRED= " + creds);