diff --git a/.travis.yml b/.travis.yml index a890f6a82..4de9c5b1c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,6 +30,7 @@ env: - WHEELHOUSE="https://unidata-python.s3.amazonaws.com/wheelhouse/index.html" - WHEELDIR="wheelhouse/" - EXTRAS="test" + - EXTRA_PACKAGES="pydocstyle<4.0" - GH_PAGES_DIR="$HOME/gh-pages" matrix: - TASK="coverage" diff --git a/siphon/catalog.py b/siphon/catalog.py index fbc64ce4c..0662d7d3b 100644 --- a/siphon/catalog.py +++ b/siphon/catalog.py @@ -43,8 +43,9 @@ class DatasetCollection(IndexableMapping): default_regex = re.compile(r'(?P\d{4})(?P[01]\d)(?P[0123]\d)_' r'(?P[012]\d)(?P[0-5]\d)') - def _get_datasets_with_times(self, regex): + def _get_datasets_with_times(self, regex, strptime=None): # Set the default regex if we don't have one + # If strptime is provided, pass the regex group named 'strptime' to strptime if regex is None: regex = self.default_regex else: @@ -59,11 +60,17 @@ def _get_datasets_with_times(self, regex): if match: found_date = True date_parts = match.groupdict() - dt = datetime(int(date_parts.get('year', 0)), int(date_parts.get('month', 0)), - int(date_parts.get('day', 0)), int(date_parts.get('hour', 0)), - int(date_parts.get('minute', 0)), - int(date_parts.get('second', 0)), - int(date_parts.get('microsecond', 0))) + if strptime is not None: + date_str = date_parts.get('strptime', 0) + dt = datetime.strptime(date_str, strptime) + else: + dt = datetime(int(date_parts.get('year', 0)), + int(date_parts.get('month', 0)), + int(date_parts.get('day', 0)), + int(date_parts.get('hour', 0)), + int(date_parts.get('minute', 0)), + int(date_parts.get('second', 0)), + int(date_parts.get('microsecond', 0))) yield dt, self[ds] # If we never found any keys that match, we should let the user know that rather @@ -71,8 +78,8 @@ def _get_datasets_with_times(self, regex): if not found_date: raise ValueError('No datasets with times found.') - def filter_time_nearest(self, time, regex=None): - """Filter keys for an item closest to the desired time. + def filter_time_nearest(self, time, regex=None, strptime=None): + r"""Filter keys for an item closest to the desired time. Loops over all keys in the collection and uses `regex` to extract and build `datetime`s. The collection of `datetime`s is compared to `start` and the value that @@ -86,21 +93,28 @@ def filter_time_nearest(self, time, regex=None): The desired time regex : str, optional The regular expression to use to extract date/time information from the key. If - given, this should contain named groups: 'year', 'month', 'day', 'hour', 'minute', - 'second', and 'microsecond', as appropriate. When a match is found, any of those - groups missing from the pattern will be assigned a value of 0. The default pattern - looks for patterns like: 20171118_2356. + given, this should contain either + 1. named groups: 'year', 'month', 'day', 'hour', 'minute', 'second', + and 'microsecond', as appropriate. When a match is found, any of those groups + missing from the pattern will be assigned a value of 0. The default pattern looks + for patterns like: 20171118_2356. + or + 2. a group named 'strptime' (e.g., r'_s(?P\d{13})' for GOES-16 data) + to be parsed with strptime. + strptime : str, optional + the format string that corresponds to regex option (2) above. For example, GOES-16 + data with a julian date matching the regex above is parsed with '%Y%j%H%M%S'. Returns ------- The value with a time closest to that desired """ - return min(self._get_datasets_with_times(regex), + return min(self._get_datasets_with_times(regex, strptime), key=lambda i: abs((i[0] - time).total_seconds()))[-1] - def filter_time_range(self, start, end, regex=None): - """Filter keys for all items within the desired time range. + def filter_time_range(self, start, end, regex=None, strptime=None): + r"""Filter keys for all items within the desired time range. Loops over all keys in the collection and uses `regex` to extract and build `datetime`s. From the collection of `datetime`s, all values within `start` and `end` @@ -115,17 +129,24 @@ def filter_time_range(self, start, end, regex=None): The end of the desired time range, inclusive regex : str, optional The regular expression to use to extract date/time information from the key. If - given, this should contain named groups: 'year', 'month', 'day', 'hour', 'minute', - 'second', and 'microsecond', as appropriate. When a match is found, any of those - groups missing from the pattern will be assigned a value of 0. The default pattern - looks for patterns like: 20171118_2356. + given, this should contain either + 1. named groups: 'year', 'month', 'day', 'hour', 'minute', 'second', + and 'microsecond', as appropriate. When a match is found, any of those groups + missing from the pattern will be assigned a value of 0. The default pattern looks + for patterns like: 20171118_2356. + or + 2. a group named 'strptime' (e.g., r'_s(?P\d{13})' for GOES-16 data) + to be parsed with strptime. + strptime : str, optional + the format string that corresponds to regex option (2) above. For example, GOES-16 + data with a julian date matching the regex above is parsed with '%Y%j%H%M%S'. Returns ------- All values corresponding to times within the specified range """ - return [item[-1] for item in self._get_datasets_with_times(regex) + return [item[-1] for item in self._get_datasets_with_times(regex, strptime) if start <= item[0] <= end] def __str__(self): diff --git a/siphon/tests/test_catalog.py b/siphon/tests/test_catalog.py index ca394a2d6..98af1f536 100644 --- a/siphon/tests/test_catalog.py +++ b/siphon/tests/test_catalog.py @@ -221,6 +221,45 @@ def test_datasets_time_range(): 'NAM_CONUS_20km_noaaport_20150529_0000.grib1'] +@recorder.use_cassette('top_level_20km_rap_catalog') +def test_datasets_time_range_regex(): + """Test getting datasets by time range using filenames, with manual regex.""" + # This is DatasetCollection.default_regex, but tests passing it explicitly + regex = (r'(?P\d{4})(?P[01]\d)(?P[0123]\d)_' + r'(?P[012]\d)(?P[0-5]\d)') + url = ('http://thredds.ucar.edu/thredds/catalog/grib/NCEP/NAM/' + 'CONUS_20km/noaaport/catalog.xml') + cat = TDSCatalog(url) + in_range = cat.catalog_refs.filter_time_range(datetime(2015, 5, 28, 0), + datetime(2015, 5, 29, 0), + regex=regex) + titles = [item.title for item in in_range] + assert titles == ['NAM_CONUS_20km_noaaport_20150528_0000.grib1', + 'NAM_CONUS_20km_noaaport_20150528_0600.grib1', + 'NAM_CONUS_20km_noaaport_20150528_1200.grib1', + 'NAM_CONUS_20km_noaaport_20150528_1800.grib1', + 'NAM_CONUS_20km_noaaport_20150529_0000.grib1'] + + +@recorder.use_cassette('top_level_20km_rap_catalog') +def test_datasets_time_range_strptime(): + """Test getting datasets by time range using filenames, with strptime.""" + regex = r'noaaport_(?P\d{8}_\d{4})' + strptime = '%Y%m%d_%H%M' + url = ('http://thredds.ucar.edu/thredds/catalog/grib/NCEP/NAM/' + 'CONUS_20km/noaaport/catalog.xml') + cat = TDSCatalog(url) + in_range = cat.catalog_refs.filter_time_range(datetime(2015, 5, 28, 0), + datetime(2015, 5, 29, 0), + regex=regex, strptime=strptime) + titles = [item.title for item in in_range] + assert titles == ['NAM_CONUS_20km_noaaport_20150528_0000.grib1', + 'NAM_CONUS_20km_noaaport_20150528_0600.grib1', + 'NAM_CONUS_20km_noaaport_20150528_1200.grib1', + 'NAM_CONUS_20km_noaaport_20150528_1800.grib1', + 'NAM_CONUS_20km_noaaport_20150529_0000.grib1'] + + @recorder.use_cassette('top_level_20km_rap_catalog') def test_datasets_time_range_raises(): """Test getting datasets by time range using filenames."""