From 65002e273a9838e54d214f27955175cb2bc2abf4 Mon Sep 17 00:00:00 2001 From: Jonn Smith Date: Wed, 29 Aug 2018 14:05:27 -0400 Subject: [PATCH 1/2] Changed readme parsing to look for manifest instead. Added in a unit test to check the version regex. --- .../dataSources/DataSourceUtils.java | 97 ++++++++++--------- .../dataSources/DataSourceUtilsUnitTest.java | 80 ++++++++++++++- 2 files changed, 127 insertions(+), 50 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java index 531ed0e0545..c2075e6a243 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java @@ -48,12 +48,14 @@ private DataSourceUtils() {} private static final PathMatcher configFileMatcher = FileSystems.getDefault().getPathMatcher("glob:**/*.config"); - private static final String README_VERSION_LINE_START = "Version:"; - private static final String README_SOURCE_LINE_START = "Source:"; - private static final String README_ALT_SOURCE_LINE_START = "Alternate Source:"; - private static final Pattern VERSION_PATTERN = Pattern.compile(README_VERSION_LINE_START + "\\s+(\\d+)\\.(\\d+)\\.(\\d\\d\\d\\d)(\\d\\d)(\\d\\d)"); - private static final Pattern SOURCE_PATTERN = Pattern.compile(README_SOURCE_LINE_START + "\\s+(ftp.*)"); - private static final Pattern ALT_SOURCE_PATTERN = Pattern.compile(README_ALT_SOURCE_LINE_START + "\\s+(gs.*)"); + @VisibleForTesting + static final String MANIFEST_VERSION_LINE_START = "Version:"; + private static final String MANIFEST_SOURCE_LINE_START = "Source:"; + private static final String MANIFEST_ALT_SOURCE_LINE_START = "Alternate Source:"; + @VisibleForTesting + static final Pattern VERSION_PATTERN = Pattern.compile(MANIFEST_VERSION_LINE_START + "\\s+(\\d+)\\.(\\d+)\\.(\\d\\d\\d\\d)(\\d\\d)(\\d\\d)(.*)"); + private static final Pattern SOURCE_PATTERN = Pattern.compile(MANIFEST_SOURCE_LINE_START + "\\s+(ftp.*)"); + private static final Pattern ALT_SOURCE_PATTERN = Pattern.compile(MANIFEST_ALT_SOURCE_LINE_START + "\\s+(gs.*)"); // Track our minimum version number here: @VisibleForTesting @@ -63,19 +65,18 @@ private DataSourceUtils() {} @VisibleForTesting static final int MIN_YEAR_RELEASED = 2018; @VisibleForTesting - static final int MIN_MONTH_RELEASED = 6; + static final int MIN_MONTH_RELEASED = 8; @VisibleForTesting - static final int MIN_DAY_RELEASED = 15; + static final int MIN_DAY_RELEASED = 29; //================================================================================================================== // Public Static Members: /** The minimum version of the data sources required for funcotator to run. */ public static final String CURRENT_MINIMUM_DATA_SOURCE_VERSION = String.format("v%d.%d.%d%02d%02d", MIN_MAJOR_VERSION_NUMBER, MIN_MINOR_VERSION_NUMBER, MIN_YEAR_RELEASED, MIN_MONTH_RELEASED, MIN_DAY_RELEASED); - public static final String README_FILE_NAME = "README.txt"; + public static final String MANIFEST_FILE_NAME = "MANIFEST.txt"; public static final String DATA_SOURCES_FTP_PATH = "ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/funcotator/"; public static final String DATA_SOURCES_BUCKET_PATH = "gs://broad-public-datasets/funcotator/"; - public static final String CONFIG_FILE_FIELD_NAME_NAME = "name"; public static final String CONFIG_FILE_FIELD_NAME_VERSION = "version"; public static final String CONFIG_FILE_FIELD_NAME_SRC_FILE = "src_file"; @@ -100,7 +101,7 @@ private DataSourceUtils() {} * @param dataSourceDirectories A {@link List} of {@link Path} to the directories containing our data sources. Must not be {@code null}. * @return The contents of the config files for each of the data sources found in the given {@code dataSourceDirectories}. */ - public static Map getAndValidateDataSourcesFromPaths(final String refVersion, + public static Map getAndValidateDataSourcesFromPaths( final String refVersion, final List dataSourceDirectories) { Utils.nonNull(refVersion); Utils.nonNull(dataSourceDirectories); @@ -115,13 +116,13 @@ public static Map getAndValidateDataSourcesFromPaths(final Str logger.info("Initializing data sources from directory: " + pathString); - final Path p = IOUtils.getPath(pathString); - if ( !isValidDirectory(p) ) { - throw new UserException("ERROR: Given data source path is not a valid directory: " + p.toUri().toString()); + final Path pathToDatasources = IOUtils.getPath(pathString); + if ( !isValidDirectory(pathToDatasources) ) { + throw new UserException("ERROR: Given data source path is not a valid directory: " + pathToDatasources.toUri()); } // Log information from the datasources directory so we can have a record of what we're using: - final boolean isGoodVersionOfDataSources = logDataSourcesInfo(p); + final boolean isGoodVersionOfDataSources = logDataSourcesInfo(pathToDatasources); if ( !isGoodVersionOfDataSources ) { continue; @@ -129,7 +130,7 @@ public static Map getAndValidateDataSourcesFromPaths(final Str // Now that we have a valid directory, we need to grab a list of sub-directories in it: try { - for ( final Path dataSourceTopDir : Files.list(p).filter(DataSourceUtils::isValidDirectory).collect(Collectors.toSet()) ) { + for ( final Path dataSourceTopDir : Files.list(pathToDatasources).filter(DataSourceUtils::isValidDirectory).collect(Collectors.toSet()) ) { // Get the path that corresponds to our reference version: final Path dataSourceDir = dataSourceTopDir.resolve(refVersion); @@ -166,7 +167,7 @@ public static Map getAndValidateDataSourcesFromPaths(final Str } } catch (final IOException ex) { - throw new GATKException("Unable to read contents of: " + p.toUri().toString(), ex); + throw new GATKException("Unable to read contents of: " + pathToDatasources.toUri().toString(), ex); } } @@ -461,12 +462,14 @@ private static Properties readConfigFileProperties(final Path configFilePath) { * user can create their own data sources directory, which may not contain the metadata we seek. * * NOTE: The README file in a Data Sources directory is assumed to have the following properties: - * - Its name must be {@link #README_FILE_NAME} - * - It must contain a line starting with {@link #README_VERSION_LINE_START} containing an alphanumeric string containing the version number information. + * - Its name must be {@link #MANIFEST_FILE_NAME} + * - It must contain a line starting with {@link #MANIFEST_VERSION_LINE_START} containing an alphanumeric string containing the version number information. * - This version information takes the form of: - * [MAJOR_VERSION].[MINOR_VERSION].[RELEASE_YEAR][RELEASE_MONTH][RELEASE_DAY] + * [MAJOR_VERSION].[MINOR_VERSION].[RELEASE_YEAR][RELEASE_MONTH][RELEASE_DAY][VERSION_DECORATOR]? * e.g. - * 1.1.20180204 (version 1.1 released Feb. 2, 2018) + * 1.1.20180204 (version 1.1 released Feb. 2, 2018) + * 4.2.20480608somatic (version 4.2 released June 6, 2048 - somatic data sources) + * 1.7.20190918X (version 1.7 released Sept. 18, 2048 - X data sources) * * * @param dataSourcesPath {@link Path} to a Data Sources directory to check. @@ -476,34 +479,36 @@ private static boolean logDataSourcesInfo(final Path dataSourcesPath) { boolean dataSourcesPathIsAcceptable = true; - final Path readmePath = dataSourcesPath.resolve(IOUtils.getPath(README_FILE_NAME)); + final Path manifestPath = dataSourcesPath.resolve(IOUtils.getPath(MANIFEST_FILE_NAME)); String version = null; - if ( Files.exists(readmePath) && Files.isRegularFile(readmePath) && Files.isReadable(readmePath) ) { + if ( Files.exists(manifestPath) && Files.isRegularFile(manifestPath) && Files.isReadable(manifestPath) ) { - try ( final BufferedReader reader = Files.newBufferedReader(readmePath) ) { + try ( final BufferedReader reader = Files.newBufferedReader(manifestPath) ) { - Integer versionMajor = null; - Integer versionMinor = null; - Integer versionYear = null; - Integer versionMonth = null; - Integer versionDay = null; - String source = null; - String alternateSource = null; + Integer versionMajor = null; + Integer versionMinor = null; + Integer versionYear = null; + Integer versionMonth = null; + Integer versionDay = null; + String versionDecorator = null; + String source = null; + String alternateSource = null; // Get the info from our README file: String line = reader.readLine(); while ((line != null) && ((version == null) || (source == null) || (alternateSource == null))) { - if (version == null && line.startsWith(README_VERSION_LINE_START)) { - final Matcher m = VERSION_PATTERN.matcher(line); - if ( m.matches() ) { - versionMajor = Integer.valueOf(m.group(1)); - versionMinor = Integer.valueOf(m.group(2)); - versionYear = Integer.valueOf(m.group(3)); - versionMonth = Integer.valueOf(m.group(4)); - versionDay = Integer.valueOf(m.group(5)); + if (version == null && line.startsWith(MANIFEST_VERSION_LINE_START)) { + final Matcher matcher = VERSION_PATTERN.matcher(line); + if ( matcher.matches() ) { + versionMajor = Integer.valueOf(matcher.group(1)); + versionMinor = Integer.valueOf(matcher.group(2)); + versionYear = Integer.valueOf(matcher.group(3)); + versionMonth = Integer.valueOf(matcher.group(4)); + versionDay = Integer.valueOf(matcher.group(5)); + versionDecorator = matcher.group(6); version = versionMajor + "." + versionMinor + "." + versionYear + "" + versionMonth + "" + versionDay; } @@ -512,7 +517,7 @@ private static boolean logDataSourcesInfo(final Path dataSourcesPath) { } } - if (source == null && line.startsWith(README_SOURCE_LINE_START)) { + if (source == null && line.startsWith(MANIFEST_SOURCE_LINE_START)) { final Matcher m = SOURCE_PATTERN.matcher(line); if ( m.matches() ) { source = m.group(1); @@ -522,7 +527,7 @@ private static boolean logDataSourcesInfo(final Path dataSourcesPath) { } } - if (alternateSource == null && line.startsWith(README_ALT_SOURCE_LINE_START)) { + if (alternateSource == null && line.startsWith(MANIFEST_ALT_SOURCE_LINE_START)) { final Matcher m = ALT_SOURCE_PATTERN.matcher(line); if ( m.matches() ) { alternateSource = m.group(1); @@ -537,7 +542,7 @@ private static boolean logDataSourcesInfo(final Path dataSourcesPath) { // Make sure we have good info: if ( version == null ) { - logger.warn("Unable to read version information from data sources info/readme file: " + readmePath.toUri().toString()); + logger.warn("Unable to read version information from data sources info/readme file: " + manifestPath.toUri().toString()); } else { logger.info("Data sources version: " + version); @@ -547,25 +552,25 @@ private static boolean logDataSourcesInfo(final Path dataSourcesPath) { } if ( source == null ) { - logger.warn("Unable to read source information from data sources info/readme file: " + readmePath.toUri().toString()); + logger.warn("Unable to read source information from data sources info/readme file: " + manifestPath.toUri().toString()); } else { logger.info("Data sources source: " + source); } if ( alternateSource == null ) { - logger.warn("Unable to read alternate source information from data sources info/readme file: " + readmePath.toUri().toString()); + logger.warn("Unable to read alternate source information from data sources info/readme file: " + manifestPath.toUri().toString()); } else { logger.info("Data sources alternate source: " + alternateSource); } } catch (final Exception ex) { - logger.warn("Could not read " + README_FILE_NAME + ": unable to log data sources version information.", ex); + logger.warn("Could not read " + MANIFEST_FILE_NAME + ": unable to log data sources version information.", ex); } } else { - logger.warn("Could not read " + README_FILE_NAME + ": unable to log data sources version information."); + logger.warn("Could not read " + MANIFEST_FILE_NAME + ": unable to log data sources version information."); } // Warn the user if they need newer stuff. diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtilsUnitTest.java index e759f310394..2b9725a01fb 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtilsUnitTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.tools.funcotator.dataSources; +import org.apache.commons.lang.RandomStringUtils; import org.broadinstitute.hellbender.GATKBaseTest; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -7,6 +8,8 @@ import java.util.ArrayList; import java.util.Iterator; +import java.util.List; +import java.util.regex.Matcher; /** * Test class for {@link DataSourceUtils} @@ -23,11 +26,8 @@ public class DataSourceUtilsUnitTest extends GATKBaseTest { //================================================================================================================== // Helper Methods: - //================================================================================================================== - // Data Providers: + private List createBaseTestVersionData() { - @DataProvider - private Iterator provideForValidateVersionInformation() { final ArrayList testArgs = new ArrayList<>(); final ArrayList baseArgs = new ArrayList<>(); @@ -65,7 +65,44 @@ private Iterator provideForValidateVersionInformation() { // 1 Year after OK release date, but 1 month and 1 day before OK release date (should pass): testArgs.add( new Object[] { DataSourceUtils.MIN_MAJOR_VERSION_NUMBER, DataSourceUtils.MIN_MINOR_VERSION_NUMBER, DataSourceUtils.MIN_YEAR_RELEASED+1, DataSourceUtils.MIN_MONTH_RELEASED-1, DataSourceUtils.MIN_DAY_RELEASED-1, true } ); + return testArgs; + } + + //================================================================================================================== + // Data Providers: + + @DataProvider + private Iterator provideForValidateVersionInformation() { + return createBaseTestVersionData().iterator(); + } + + @DataProvider + private Iterator provideForTestVersionRegex() { + + final ArrayList testArgs = new ArrayList<>(); + + final List baseArgs = createBaseTestVersionData(); + + for ( final Object[] args : baseArgs ) { + for ( int whitespace = 0; whitespace < 2; ++whitespace ) { + for ( int decoratorCount = 0; decoratorCount < 10; ++decoratorCount ) { + + final String whitespaceString = whitespace != 0 ? "\t \t \t " : " "; + final String decoratorString = decoratorCount !=0 ? RandomStringUtils.randomAlphanumeric(decoratorCount) : ""; + + testArgs.add( + new Object[] { + args[0],args[1],args[2],args[3],args[4], + decoratorString, + whitespaceString + } + ); + } + } + } + return testArgs.iterator(); + } //================================================================================================================== @@ -88,4 +125,39 @@ public void testValidateVersionInformation(final Integer major, expected.booleanValue() ); } + @Test(dataProvider = "provideForTestVersionRegex") + public void testVersionRegex(final Integer major, + final Integer minor, + final Integer year, + final Integer month, + final Integer day, + final String decorator, + final String leadingWhitespace ) { + + // Construct the string: + final String versionString = String.format( + "%s%s%d.%d.%4d%02d%02d%s", + DataSourceUtils.MANIFEST_VERSION_LINE_START,leadingWhitespace, + major,minor,year,month,day,decorator + ); + + final Matcher matcher = DataSourceUtils.VERSION_PATTERN.matcher(versionString); + + Assert.assertTrue(matcher.matches()); + + final Integer versionMajor = Integer.valueOf(matcher.group(1)); + final Integer versionMinor = Integer.valueOf(matcher.group(2)); + final Integer versionYear = Integer.valueOf(matcher.group(3)); + final Integer versionMonth = Integer.valueOf(matcher.group(4)); + final Integer versionDay = Integer.valueOf(matcher.group(5)); + final String versionDecorator = matcher.group(6); + + Assert.assertEquals( versionMajor, major ); + Assert.assertEquals( versionMinor, minor ); + Assert.assertEquals( versionYear, year ); + Assert.assertEquals( versionMonth, month ); + Assert.assertEquals( versionDay, day ); + Assert.assertEquals( versionDecorator, decorator ); + } + } From 76662ef0c5b2083b22e388d80487f8c3650d131f Mon Sep 17 00:00:00 2001 From: Jonn Smith Date: Fri, 31 Aug 2018 12:41:50 -0400 Subject: [PATCH 2/2] Addressing @LeeTL1220 comments. --- .../tools/funcotator/dataSources/DataSourceUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java index c2075e6a243..518331db258 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java @@ -461,7 +461,7 @@ private static Properties readConfigFileProperties(final Path configFilePath) { * We assume the data sources path is OK in the case that the version information cannot be read because the * user can create their own data sources directory, which may not contain the metadata we seek. * - * NOTE: The README file in a Data Sources directory is assumed to have the following properties: + * NOTE: The MANIFEST file in a Data Sources directory is assumed to have the following properties: * - Its name must be {@link #MANIFEST_FILE_NAME} * - It must contain a line starting with {@link #MANIFEST_VERSION_LINE_START} containing an alphanumeric string containing the version number information. * - This version information takes the form of: