-
Notifications
You must be signed in to change notification settings - Fork 597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Updated datasource version parsing #5149
Merged
jonn-smith
merged 2 commits into
master
from
jts_funcotator_datasource_refactoring_4582_4692
Aug 31, 2018
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,12 +48,14 @@ private DataSourceUtils() {} | |
private static final PathMatcher configFileMatcher = | ||
FileSystems.getDefault().getPathMatcher("glob:**/*.config"); | ||
|
||
private static final String README_VERSION_LINE_START = "Version:"; | ||
private static final String README_SOURCE_LINE_START = "Source:"; | ||
private static final String README_ALT_SOURCE_LINE_START = "Alternate Source:"; | ||
private static final Pattern VERSION_PATTERN = Pattern.compile(README_VERSION_LINE_START + "\\s+(\\d+)\\.(\\d+)\\.(\\d\\d\\d\\d)(\\d\\d)(\\d\\d)"); | ||
private static final Pattern SOURCE_PATTERN = Pattern.compile(README_SOURCE_LINE_START + "\\s+(ftp.*)"); | ||
private static final Pattern ALT_SOURCE_PATTERN = Pattern.compile(README_ALT_SOURCE_LINE_START + "\\s+(gs.*)"); | ||
@VisibleForTesting | ||
static final String MANIFEST_VERSION_LINE_START = "Version:"; | ||
private static final String MANIFEST_SOURCE_LINE_START = "Source:"; | ||
private static final String MANIFEST_ALT_SOURCE_LINE_START = "Alternate Source:"; | ||
@VisibleForTesting | ||
static final Pattern VERSION_PATTERN = Pattern.compile(MANIFEST_VERSION_LINE_START + "\\s+(\\d+)\\.(\\d+)\\.(\\d\\d\\d\\d)(\\d\\d)(\\d\\d)(.*)"); | ||
private static final Pattern SOURCE_PATTERN = Pattern.compile(MANIFEST_SOURCE_LINE_START + "\\s+(ftp.*)"); | ||
private static final Pattern ALT_SOURCE_PATTERN = Pattern.compile(MANIFEST_ALT_SOURCE_LINE_START + "\\s+(gs.*)"); | ||
|
||
// Track our minimum version number here: | ||
@VisibleForTesting | ||
|
@@ -63,19 +65,18 @@ private DataSourceUtils() {} | |
@VisibleForTesting | ||
static final int MIN_YEAR_RELEASED = 2018; | ||
@VisibleForTesting | ||
static final int MIN_MONTH_RELEASED = 6; | ||
static final int MIN_MONTH_RELEASED = 8; | ||
@VisibleForTesting | ||
static final int MIN_DAY_RELEASED = 15; | ||
static final int MIN_DAY_RELEASED = 29; | ||
|
||
//================================================================================================================== | ||
// Public Static Members: | ||
|
||
/** The minimum version of the data sources required for funcotator to run. */ | ||
public static final String CURRENT_MINIMUM_DATA_SOURCE_VERSION = String.format("v%d.%d.%d%02d%02d", MIN_MAJOR_VERSION_NUMBER, MIN_MINOR_VERSION_NUMBER, MIN_YEAR_RELEASED, MIN_MONTH_RELEASED, MIN_DAY_RELEASED); | ||
public static final String README_FILE_NAME = "README.txt"; | ||
public static final String MANIFEST_FILE_NAME = "MANIFEST.txt"; | ||
public static final String DATA_SOURCES_FTP_PATH = "ftp://[email protected]/bundle/funcotator/"; | ||
public static final String DATA_SOURCES_BUCKET_PATH = "gs://broad-public-datasets/funcotator/"; | ||
|
||
public static final String CONFIG_FILE_FIELD_NAME_NAME = "name"; | ||
public static final String CONFIG_FILE_FIELD_NAME_VERSION = "version"; | ||
public static final String CONFIG_FILE_FIELD_NAME_SRC_FILE = "src_file"; | ||
|
@@ -100,7 +101,7 @@ private DataSourceUtils() {} | |
* @param dataSourceDirectories A {@link List} of {@link Path} to the directories containing our data sources. Must not be {@code null}. | ||
* @return The contents of the config files for each of the data sources found in the given {@code dataSourceDirectories}. | ||
*/ | ||
public static Map<Path, Properties> getAndValidateDataSourcesFromPaths(final String refVersion, | ||
public static Map<Path, Properties> getAndValidateDataSourcesFromPaths( final String refVersion, | ||
final List<String> dataSourceDirectories) { | ||
Utils.nonNull(refVersion); | ||
Utils.nonNull(dataSourceDirectories); | ||
|
@@ -115,21 +116,21 @@ public static Map<Path, Properties> getAndValidateDataSourcesFromPaths(final Str | |
|
||
logger.info("Initializing data sources from directory: " + pathString); | ||
|
||
final Path p = IOUtils.getPath(pathString); | ||
if ( !isValidDirectory(p) ) { | ||
throw new UserException("ERROR: Given data source path is not a valid directory: " + p.toUri().toString()); | ||
final Path pathToDatasources = IOUtils.getPath(pathString); | ||
if ( !isValidDirectory(pathToDatasources) ) { | ||
throw new UserException("ERROR: Given data source path is not a valid directory: " + pathToDatasources.toUri()); | ||
} | ||
|
||
// Log information from the datasources directory so we can have a record of what we're using: | ||
final boolean isGoodVersionOfDataSources = logDataSourcesInfo(p); | ||
final boolean isGoodVersionOfDataSources = logDataSourcesInfo(pathToDatasources); | ||
|
||
if ( !isGoodVersionOfDataSources ) { | ||
continue; | ||
} | ||
|
||
// Now that we have a valid directory, we need to grab a list of sub-directories in it: | ||
try { | ||
for ( final Path dataSourceTopDir : Files.list(p).filter(DataSourceUtils::isValidDirectory).collect(Collectors.toSet()) ) { | ||
for ( final Path dataSourceTopDir : Files.list(pathToDatasources).filter(DataSourceUtils::isValidDirectory).collect(Collectors.toSet()) ) { | ||
|
||
// Get the path that corresponds to our reference version: | ||
final Path dataSourceDir = dataSourceTopDir.resolve(refVersion); | ||
|
@@ -166,7 +167,7 @@ public static Map<Path, Properties> getAndValidateDataSourcesFromPaths(final Str | |
} | ||
} | ||
catch (final IOException ex) { | ||
throw new GATKException("Unable to read contents of: " + p.toUri().toString(), ex); | ||
throw new GATKException("Unable to read contents of: " + pathToDatasources.toUri().toString(), ex); | ||
} | ||
} | ||
|
||
|
@@ -460,13 +461,15 @@ private static Properties readConfigFileProperties(final Path configFilePath) { | |
* We assume the data sources path is OK in the case that the version information cannot be read because the | ||
* user can create their own data sources directory, which may not contain the metadata we seek. | ||
* | ||
* NOTE: The README file in a Data Sources directory is assumed to have the following properties: | ||
* - Its name must be {@link #README_FILE_NAME} | ||
* - It must contain a line starting with {@link #README_VERSION_LINE_START} containing an alphanumeric string containing the version number information. | ||
* NOTE: The MANIFEST file in a Data Sources directory is assumed to have the following properties: | ||
* - Its name must be {@link #MANIFEST_FILE_NAME} | ||
* - It must contain a line starting with {@link #MANIFEST_VERSION_LINE_START} containing an alphanumeric string containing the version number information. | ||
* - This version information takes the form of: | ||
* [MAJOR_VERSION].[MINOR_VERSION].[RELEASE_YEAR][RELEASE_MONTH][RELEASE_DAY] | ||
* [MAJOR_VERSION].[MINOR_VERSION].[RELEASE_YEAR][RELEASE_MONTH][RELEASE_DAY][VERSION_DECORATOR]? | ||
* e.g. | ||
* 1.1.20180204 (version 1.1 released Feb. 2, 2018) | ||
* 1.1.20180204 (version 1.1 released Feb. 2, 2018) | ||
* 4.2.20480608somatic (version 4.2 released June 6, 2048 - somatic data sources) | ||
* 1.7.20190918X (version 1.7 released Sept. 18, 2048 - X data sources) | ||
* | ||
* | ||
* @param dataSourcesPath {@link Path} to a Data Sources directory to check. | ||
|
@@ -476,34 +479,36 @@ private static boolean logDataSourcesInfo(final Path dataSourcesPath) { | |
|
||
boolean dataSourcesPathIsAcceptable = true; | ||
|
||
final Path readmePath = dataSourcesPath.resolve(IOUtils.getPath(README_FILE_NAME)); | ||
final Path manifestPath = dataSourcesPath.resolve(IOUtils.getPath(MANIFEST_FILE_NAME)); | ||
|
||
String version = null; | ||
|
||
if ( Files.exists(readmePath) && Files.isRegularFile(readmePath) && Files.isReadable(readmePath) ) { | ||
if ( Files.exists(manifestPath) && Files.isRegularFile(manifestPath) && Files.isReadable(manifestPath) ) { | ||
|
||
try ( final BufferedReader reader = Files.newBufferedReader(readmePath) ) { | ||
try ( final BufferedReader reader = Files.newBufferedReader(manifestPath) ) { | ||
|
||
Integer versionMajor = null; | ||
Integer versionMinor = null; | ||
Integer versionYear = null; | ||
Integer versionMonth = null; | ||
Integer versionDay = null; | ||
String source = null; | ||
String alternateSource = null; | ||
Integer versionMajor = null; | ||
Integer versionMinor = null; | ||
Integer versionYear = null; | ||
Integer versionMonth = null; | ||
Integer versionDay = null; | ||
String versionDecorator = null; | ||
String source = null; | ||
String alternateSource = null; | ||
|
||
// Get the info from our README file: | ||
String line = reader.readLine(); | ||
while ((line != null) && ((version == null) || (source == null) || (alternateSource == null))) { | ||
|
||
if (version == null && line.startsWith(README_VERSION_LINE_START)) { | ||
final Matcher m = VERSION_PATTERN.matcher(line); | ||
if ( m.matches() ) { | ||
versionMajor = Integer.valueOf(m.group(1)); | ||
versionMinor = Integer.valueOf(m.group(2)); | ||
versionYear = Integer.valueOf(m.group(3)); | ||
versionMonth = Integer.valueOf(m.group(4)); | ||
versionDay = Integer.valueOf(m.group(5)); | ||
if (version == null && line.startsWith(MANIFEST_VERSION_LINE_START)) { | ||
final Matcher matcher = VERSION_PATTERN.matcher(line); | ||
if ( matcher.matches() ) { | ||
versionMajor = Integer.valueOf(matcher.group(1)); | ||
versionMinor = Integer.valueOf(matcher.group(2)); | ||
versionYear = Integer.valueOf(matcher.group(3)); | ||
versionMonth = Integer.valueOf(matcher.group(4)); | ||
versionDay = Integer.valueOf(matcher.group(5)); | ||
versionDecorator = matcher.group(6); | ||
|
||
version = versionMajor + "." + versionMinor + "." + versionYear + "" + versionMonth + "" + versionDay; | ||
} | ||
|
@@ -512,7 +517,7 @@ private static boolean logDataSourcesInfo(final Path dataSourcesPath) { | |
} | ||
} | ||
|
||
if (source == null && line.startsWith(README_SOURCE_LINE_START)) { | ||
if (source == null && line.startsWith(MANIFEST_SOURCE_LINE_START)) { | ||
final Matcher m = SOURCE_PATTERN.matcher(line); | ||
if ( m.matches() ) { | ||
source = m.group(1); | ||
|
@@ -522,7 +527,7 @@ private static boolean logDataSourcesInfo(final Path dataSourcesPath) { | |
} | ||
} | ||
|
||
if (alternateSource == null && line.startsWith(README_ALT_SOURCE_LINE_START)) { | ||
if (alternateSource == null && line.startsWith(MANIFEST_ALT_SOURCE_LINE_START)) { | ||
final Matcher m = ALT_SOURCE_PATTERN.matcher(line); | ||
if ( m.matches() ) { | ||
alternateSource = m.group(1); | ||
|
@@ -537,7 +542,7 @@ private static boolean logDataSourcesInfo(final Path dataSourcesPath) { | |
|
||
// Make sure we have good info: | ||
if ( version == null ) { | ||
logger.warn("Unable to read version information from data sources info/readme file: " + readmePath.toUri().toString()); | ||
logger.warn("Unable to read version information from data sources info/readme file: " + manifestPath.toUri().toString()); | ||
} | ||
else { | ||
logger.info("Data sources version: " + version); | ||
|
@@ -547,25 +552,25 @@ private static boolean logDataSourcesInfo(final Path dataSourcesPath) { | |
} | ||
|
||
if ( source == null ) { | ||
logger.warn("Unable to read source information from data sources info/readme file: " + readmePath.toUri().toString()); | ||
logger.warn("Unable to read source information from data sources info/readme file: " + manifestPath.toUri().toString()); | ||
} | ||
else { | ||
logger.info("Data sources source: " + source); | ||
} | ||
|
||
if ( alternateSource == null ) { | ||
logger.warn("Unable to read alternate source information from data sources info/readme file: " + readmePath.toUri().toString()); | ||
logger.warn("Unable to read alternate source information from data sources info/readme file: " + manifestPath.toUri().toString()); | ||
} | ||
else { | ||
logger.info("Data sources alternate source: " + alternateSource); | ||
} | ||
} | ||
catch (final Exception ex) { | ||
logger.warn("Could not read " + README_FILE_NAME + ": unable to log data sources version information.", ex); | ||
logger.warn("Could not read " + MANIFEST_FILE_NAME + ": unable to log data sources version information.", ex); | ||
} | ||
} | ||
else { | ||
logger.warn("Could not read " + README_FILE_NAME + ": unable to log data sources version information."); | ||
logger.warn("Could not read " + MANIFEST_FILE_NAME + ": unable to log data sources version information."); | ||
} | ||
|
||
// Warn the user if they need newer stuff. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did you just rename README.txt to MANIFEST.txt, or is this a more substantial change? Ie., is
MANIFEST.txt
now a file with a well-defined structured format, or is it still basically a textual README with another name?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@droazen I added a
MANIFEST.txt
file to the packages.MANIFEST.txt
contains the version and provenance information for each data source package, as well as a new decorator/information field to distinguish somatic and germline files. This version information is duplicated in the readme, but the readme is not parsed.MANIFEST.txt
a plain-text formatted file with 1 field per line. The formatting is loosely enforced with the regex inDataSourceUtils
and is of the syntax:<FIELD>: <VALUE>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, sounds good @jonn-smith, thanks for the clarification.