diff --git a/.stickler.yml b/.stickler.yml index 8745306..a0331aa 100644 --- a/.stickler.yml +++ b/.stickler.yml @@ -1,3 +1,4 @@ linters: flake8: fixer: true + python: 3 diff --git a/aospy/automate.py b/aospy/automate.py index 2259941..7fe604f 100644 --- a/aospy/automate.py +++ b/aospy/automate.py @@ -78,23 +78,9 @@ def _merge_dicts(*dict_args): return result -def _input_func_py2_py3(): - """Find function for reading user input that works on Python 2 and 3. - - See e.g. http://stackoverflow.com/questions/21731043 - """ - try: - input = raw_input - except NameError: - import builtins - input = builtins.input - return input - - -def _user_verify(input_func=_input_func_py2_py3(), - prompt='Perform these computations? [y/n] '): +def _user_verify(prompt='Perform these computations? [y/n] '): """Prompt the user for verification.""" - if not input_func(prompt).lower()[0] == 'y': + if not input(prompt).lower()[0] == 'y': raise AospyException('Execution cancelled by user.') diff --git a/aospy/data_loader.py b/aospy/data_loader.py index 9d517a4..37bcea1 100644 --- a/aospy/data_loader.py +++ b/aospy/data_loader.py @@ -2,7 +2,6 @@ import logging import os import pprint -import warnings import numpy as np import xarray as xr @@ -11,7 +10,6 @@ ETA_STR, GRID_ATTRS, TIME_STR, - TIME_BOUNDS_STR, ) from .utils import times, io @@ -112,6 +110,7 @@ def set_grid_attrs_as_coords(ds): ------- Dataset Dataset with grid attributes set as coordinates + """ grid_attrs_in_ds = set(GRID_ATTRS.keys()).intersection( set(ds.coords) | set(ds.data_vars)) @@ -130,6 +129,7 @@ def _maybe_cast_to_float64(da): Returns ------- DataArray + """ if da.dtype == np.float32: logging.warning('Datapoints were stored using the np.float32 datatype.' @@ -162,6 +162,7 @@ def _sel_var(ds, var, upcast_float32=True): ------ KeyError If the variable is not in the Dataset + """ for name in var.names: try: @@ -176,46 +177,6 @@ def _sel_var(ds, var, upcast_float32=True): raise LookupError(msg) -def _prep_time_data(ds): - """Prepare time coordinate information in Dataset for use in aospy. - - 1. If the Dataset contains a time bounds coordinate, add attributes - representing the true beginning and end dates of the time interval used - to construct the Dataset - 2. If the Dataset contains a time bounds coordinate, overwrite the time - coordinate values with the averages of the time bounds at each timestep - 3. Decode the times into np.datetime64 objects for time indexing - - Parameters - ---------- - ds : Dataset - Pre-processed Dataset with time coordinate renamed to - internal_names.TIME_STR - - Returns - ------- - Dataset - The processed Dataset - """ - ds = times.ensure_time_as_index(ds) - if TIME_BOUNDS_STR in ds: - ds = times.ensure_time_avg_has_cf_metadata(ds) - ds[TIME_STR] = times.average_time_bounds(ds) - else: - logging.warning("dt array not found. Assuming equally spaced " - "values in time, even though this may not be " - "the case") - ds = times.add_uniform_time_weights(ds) - # Suppress enable_cftimeindex is a no-op warning; we'll keep setting it for - # now to maintain backwards compatibility for older xarray versions. - with warnings.catch_warnings(): - warnings.filterwarnings('ignore') - with xr.set_options(enable_cftimeindex=True): - ds = xr.decode_cf(ds, decode_times=True, decode_coords=False, - mask_and_scale=True) - return ds - - def _load_data_from_disk(file_set, preprocess_func=lambda ds: ds, data_vars='minimal', coords='minimal', grid_attrs=None, **kwargs): @@ -243,14 +204,21 @@ def _load_data_from_disk(file_set, preprocess_func=lambda ds: ds, Returns ------- Dataset + """ apply_preload_user_commands(file_set) func = _preprocess_and_rename_grid_attrs(preprocess_func, grid_attrs, **kwargs) - return xr.open_mfdataset(file_set, preprocess=func, concat_dim=TIME_STR, - decode_times=False, decode_coords=False, - mask_and_scale=True, data_vars=data_vars, - coords=coords) + return xr.open_mfdataset( + file_set, + preprocess=func, + combine='by_coords', + decode_times=False, + decode_coords=False, + mask_and_scale=True, + data_vars=data_vars, + coords=coords, + ) def apply_preload_user_commands(file_set, cmd=io.dmget): @@ -259,6 +227,7 @@ def apply_preload_user_commands(file_set, cmd=io.dmget): For example, on the NOAA Geophysical Fluid Dynamics Laboratory computational cluster, data that is saved on their tape archive must be accessed via a `dmget` (or `hsmget`) command before being used. + """ if cmd is not None: cmd(file_set) @@ -301,16 +270,27 @@ def load_variable(self, var=None, start_date=None, end_date=None, ------- da : DataArray DataArray for the specified variable, date range, and interval in + """ - file_set = self._generate_file_set(var=var, start_date=start_date, - end_date=end_date, **DataAttrs) + file_set = self._generate_file_set( + var=var, + start_date=start_date, + end_date=end_date, + **DataAttrs, + ) ds = _load_data_from_disk( - file_set, self.preprocess_func, data_vars=self.data_vars, - coords=self.coords, start_date=start_date, end_date=end_date, - time_offset=time_offset, grid_attrs=grid_attrs, **DataAttrs + file_set, + self.preprocess_func, + data_vars=self.data_vars, + coords=self.coords, + start_date=start_date, + end_date=end_date, + time_offset=time_offset, + grid_attrs=grid_attrs, + **DataAttrs, ) if var.def_time: - ds = _prep_time_data(ds) + ds = times.prep_time_data(ds) start_date = times.maybe_convert_to_index_date_type( ds.indexes[TIME_STR], start_date) end_date = times.maybe_convert_to_index_date_type( @@ -330,6 +310,7 @@ def _load_or_get_from_model(self, var, start_date=None, end_date=None, Supports both access of grid attributes either through the DataLoader or through an optionally-provided Model object. Defaults to using the version found in the DataLoader first. + """ grid_attrs = None if model is None else model.grid_attrs diff --git a/aospy/model.py b/aospy/model.py index 7ea3d4a..a58bb9f 100644 --- a/aospy/model.py +++ b/aospy/model.py @@ -228,7 +228,8 @@ def _get_grid_files(self): try: ds = xr.open_dataset(path, decode_times=False) except (TypeError, AttributeError): - ds = xr.open_mfdataset(path, decode_times=False).load() + ds = xr.open_mfdataset(path, decode_times=False, + combine='by_coords').load() except (RuntimeError, OSError) as e: msg = str(e) + ': {}'.format(path) raise RuntimeError(msg) diff --git a/aospy/test/conftest.py b/aospy/test/conftest.py new file mode 100644 index 0000000..a06a57c --- /dev/null +++ b/aospy/test/conftest.py @@ -0,0 +1,89 @@ +"""pytest conftest.py file for sharing fixtures across modules.""" +import datetime + +from cftime import DatetimeNoLeap +import numpy as np +import pytest +import xarray as xr + +from aospy.internal_names import ( + LON_STR, + TIME_STR, + TIME_BOUNDS_STR, + BOUNDS_STR, +) + + +_DATE_RANGES = { + 'datetime': (datetime.datetime(2000, 1, 1), + datetime.datetime(2002, 12, 31)), + 'datetime64': (np.datetime64('2000-01-01'), + np.datetime64('2002-12-31')), + 'cftime': (DatetimeNoLeap(2000, 1, 1), + DatetimeNoLeap(2002, 12, 31)), + 'str': ('2000', '2002') +} + + +@pytest.fixture() +def alt_lat_str(): + return 'LATITUDE' + + +@pytest.fixture() +def var_name(): + return 'a' + + +@pytest.fixture +def ds_time_encoded_cf(): + time_bounds = np.array([[0, 31], [31, 59], [59, 90]]) + bounds = np.array([0, 1]) + time = np.array([15, 46, 74]) + data = np.zeros((3)) + ds = xr.DataArray(data, + coords=[time], + dims=[TIME_STR], + name='a').to_dataset() + ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds, + coords=[time, bounds], + dims=[TIME_STR, BOUNDS_STR], + name=TIME_BOUNDS_STR) + units_str = 'days since 2000-01-01 00:00:00' + cal_str = 'noleap' + ds[TIME_STR].attrs['units'] = units_str + ds[TIME_STR].attrs['calendar'] = cal_str + return ds + + +@pytest.fixture() +def ds_with_time_bounds(ds_time_encoded_cf, alt_lat_str, var_name): + time = ds_time_encoded_cf[TIME_STR] + data = np.zeros((3, 1, 1)) + lat = [0] + lon = [0] + + ds = xr.DataArray( + data, + coords=[time, lat, lon], + dims=[TIME_STR, alt_lat_str, LON_STR], + name=var_name, + ).to_dataset() + ds[TIME_BOUNDS_STR] = ds_time_encoded_cf[TIME_BOUNDS_STR] + return ds + + +@pytest.fixture() +def ds_inst(ds_with_time_bounds): + inst_time = np.array([3, 6, 9]) + inst_units_str = 'hours since 2000-01-01 00:00:00' + ds = ds_with_time_bounds.drop(labels=[BOUNDS_STR, TIME_BOUNDS_STR]) + ds[TIME_STR].values = inst_time + ds[TIME_STR].attrs['units'] = inst_units_str + ds[TIME_STR].attrs['calendar'] = 'noleap' + return ds + + +@pytest.fixture() +def ds_no_time(ds_with_time_bounds): + return ds_with_time_bounds.drop(TIME_STR) diff --git a/aospy/test/test_automate.py b/aospy/test/test_automate.py index 05b5d88..631c02a 100644 --- a/aospy/test/test_automate.py +++ b/aospy/test/test_automate.py @@ -1,26 +1,46 @@ from multiprocessing import cpu_count from os.path import isfile import shutil -import sys import itertools +from unittest import mock import distributed import pytest from aospy import Var, Proj -from aospy.automate import (_get_attr_by_tag, _permuted_dicts_of_specs, - _get_all_objs_of_type, _merge_dicts, - _input_func_py2_py3, AospyException, - _user_verify, CalcSuite, _MODELS_STR, _RUNS_STR, - _VARIABLES_STR, _REGIONS_STR, - _compute_or_skip_on_error, submit_mult_calcs, - _n_workers_for_local_cluster, - _prune_invalid_time_reductions) +from aospy.automate import ( + _user_verify, + _MODELS_STR, + _RUNS_STR, + _VARIABLES_STR, + _REGIONS_STR, + _compute_or_skip_on_error, + _get_all_objs_of_type, + _get_attr_by_tag, + _merge_dicts, + _n_workers_for_local_cluster, + _permuted_dicts_of_specs, + _prune_invalid_time_reductions, + AospyException, + CalcSuite, + submit_mult_calcs, +) from .data.objects import examples as lib from .data.objects.examples import ( - example_proj, example_model, example_run, var_not_time_defined, - condensation_rain, convection_rain, precip, ps, sphum, globe, sahel, bk, - p, dp + example_proj, + example_model, + example_run, + var_not_time_defined, + condensation_rain, + convection_rain, + precip, + ps, + sphum, + globe, + sahel, + bk, + p, + dp, ) @@ -128,19 +148,12 @@ def test_merge_dicts(): assert expected == _merge_dicts(dict1, dict2, dict3, dict4) -def test_input_func_py2_py3(): - result = _input_func_py2_py3() - if sys.version.startswith('3'): - import builtins - assert result is builtins.input - elif sys.version.startswith('2'): - assert result is raw_input # noqa: F821 - - def test_user_verify(): + with mock.patch('builtins.input', return_value='YES'): + _user_verify() with pytest.raises(AospyException): - _user_verify(lambda x: 'no') - _user_verify(lambda x: 'YES') + with mock.patch('builtins.input', return_value='no'): + _user_verify() @pytest.mark.parametrize( @@ -235,8 +248,7 @@ def assert_calc_files_exist(calcs, write_to_tar, dtypes_out_time): assert not isfile(calc.path_tar_out) -@pytest.mark.skipif(sys.version.startswith('2'), - reason='https://github.com/spencerahill/aospy/issues/259') +@pytest.mark.filterwarnings('ignore:Using or importing the ABCs from') @pytest.mark.parametrize( ('exec_options'), [dict(parallelize=True, write_to_tar=False), @@ -251,8 +263,6 @@ def test_submit_mult_calcs_external_client(calcsuite_init_specs_single_calc, calcsuite_init_specs_single_calc['output_time_regional_reductions']) -@pytest.mark.skipif(sys.version.startswith('2'), - reason='https://github.com/spencerahill/aospy/issues/259') @pytest.mark.parametrize( ('exec_options'), [dict(parallelize=False, write_to_tar=False), @@ -278,8 +288,6 @@ def test_submit_mult_calcs_no_calcs(calcsuite_init_specs): submit_mult_calcs(specs) -@pytest.mark.skipif(sys.version.startswith('2'), - reason='https://github.com/spencerahill/aospy/issues/259') @pytest.mark.parametrize( ('exec_options'), [dict(parallelize=True, write_to_tar=False), @@ -294,8 +302,6 @@ def test_submit_two_calcs_external_client(calcsuite_init_specs_two_calcs, calcsuite_init_specs_two_calcs['output_time_regional_reductions']) -@pytest.mark.skipif(sys.version.startswith('2'), - reason='https://github.com/spencerahill/aospy/issues/259') @pytest.mark.parametrize( ('exec_options'), [dict(parallelize=False, write_to_tar=False), diff --git a/aospy/test/test_calc_basic.py b/aospy/test/test_calc_basic.py index 24d2134..d7bc80d 100755 --- a/aospy/test/test_calc_basic.py +++ b/aospy/test/test_calc_basic.py @@ -21,22 +21,20 @@ ) -@pytest.mark.filterwarnings('ignore:The enable_cftimeindex') def _test_output_attrs(calc, dtype_out): - with xr.set_options(enable_cftimeindex=True): - with xr.open_dataset(calc.path_out[dtype_out]) as data: - expected_units = calc.var.units - if calc.dtype_out_vert == 'vert_int': - if expected_units != '': - expected_units = ("(vertical integral of {0}):" - " {0} m)").format(expected_units) - else: - expected_units = ("(vertical integral of quantity" - " with unspecified units)") - expected_description = calc.var.description - for name, arr in data.data_vars.items(): - assert expected_units == arr.attrs['units'] - assert expected_description == arr.attrs['description'] + with xr.open_dataset(calc.path_out[dtype_out]) as data: + expected_units = calc.var.units + if calc.dtype_out_vert == 'vert_int': + if expected_units != '': + expected_units = ("(vertical integral of {0}):" + " {0} m)").format(expected_units) + else: + expected_units = ("(vertical integral of quantity" + " with unspecified units)") + expected_description = calc.var.description + for name, arr in data.data_vars.items(): + assert expected_units == arr.attrs['units'] + assert expected_description == arr.attrs['description'] def _clean_test_direcs(): @@ -107,49 +105,42 @@ def test_params(request): _clean_test_direcs() -@pytest.mark.filterwarnings('ignore:The enable_cftimeindex') def test_annual_mean(test_params): calc = Calc(intvl_out='ann', dtype_out_time='av', **test_params) calc.compute() _test_files_and_attrs(calc, 'av') -@pytest.mark.filterwarnings('ignore:The enable_cftimeindex') def test_annual_ts(test_params): calc = Calc(intvl_out='ann', dtype_out_time='ts', **test_params) calc.compute() _test_files_and_attrs(calc, 'ts') -@pytest.mark.filterwarnings('ignore:The enable_cftimeindex') def test_seasonal_mean(test_params): calc = Calc(intvl_out='djf', dtype_out_time='av', **test_params) calc.compute() _test_files_and_attrs(calc, 'av') -@pytest.mark.filterwarnings('ignore:The enable_cftimeindex') def test_seasonal_ts(test_params): calc = Calc(intvl_out='djf', dtype_out_time='ts', **test_params) calc.compute() _test_files_and_attrs(calc, 'ts') -@pytest.mark.filterwarnings('ignore:The enable_cftimeindex') def test_monthly_mean(test_params): calc = Calc(intvl_out=1, dtype_out_time='av', **test_params) calc.compute() _test_files_and_attrs(calc, 'av') -@pytest.mark.filterwarnings('ignore:The enable_cftimeindex') def test_monthly_ts(test_params): calc = Calc(intvl_out=1, dtype_out_time='ts', **test_params) calc.compute() _test_files_and_attrs(calc, 'ts') -@pytest.mark.filterwarnings('ignore:The enable_cftimeindex') def test_simple_reg_av(test_params): calc = Calc(intvl_out='ann', dtype_out_time='reg.av', region=[globe], **test_params) @@ -157,7 +148,6 @@ def test_simple_reg_av(test_params): _test_files_and_attrs(calc, 'reg.av') -@pytest.mark.filterwarnings('ignore:The enable_cftimeindex') def test_simple_reg_ts(test_params): calc = Calc(intvl_out='ann', dtype_out_time='reg.ts', region=[globe], **test_params) @@ -166,7 +156,6 @@ def test_simple_reg_ts(test_params): @pytest.mark.filterwarnings('ignore:Mean of empty slice') -@pytest.mark.filterwarnings('ignore:The enable_cftimeindex') def test_complex_reg_av(test_params): calc = Calc(intvl_out='ann', dtype_out_time='reg.av', region=[sahel], **test_params) @@ -272,24 +261,19 @@ def recursive_test_params(): _clean_test_direcs() -# Remove autoclose argument in xarray version 0.12 -@pytest.mark.filterwarnings('ignore:The autoclose') -@pytest.mark.filterwarnings('ignore:The enable_cftimeindex') def test_recursive_calculation(recursive_test_params): basic_params, recursive_params = recursive_test_params calc = Calc(intvl_out='ann', dtype_out_time='av', **basic_params) calc = calc.compute() - with xr.set_options(enable_cftimeindex=True): - expected = xr.open_dataset( - calc.path_out['av'], autoclose=True)['condensation_rain'] + expected = xr.open_dataset( + calc.path_out['av'])['condensation_rain'] _test_files_and_attrs(calc, 'av') calc = Calc(intvl_out='ann', dtype_out_time='av', **recursive_params) calc = calc.compute() - with xr.set_options(enable_cftimeindex=True): - result = xr.open_dataset( - calc.path_out['av'], autoclose=True)['recursive_condensation_rain'] + result = xr.open_dataset( + calc.path_out['av'])['recursive_condensation_rain'] _test_files_and_attrs(calc, 'av') xr.testing.assert_equal(expected, result) diff --git a/aospy/test/test_data_loader.py b/aospy/test/test_data_loader.py index 8b29bf6..f4c9f37 100644 --- a/aospy/test/test_data_loader.py +++ b/aospy/test/test_data_loader.py @@ -5,30 +5,45 @@ import unittest import warnings +from cftime import DatetimeNoLeap import numpy as np import pytest import xarray as xr -from cftime import DatetimeNoLeap - from aospy import Var -from aospy.data_loader import (DataLoader, DictDataLoader, GFDLDataLoader, - NestedDictDataLoader, grid_attrs_to_aospy_names, - set_grid_attrs_as_coords, _sel_var, - _prep_time_data, - _preprocess_and_rename_grid_attrs, - _maybe_cast_to_float64) -from aospy.internal_names import (LAT_STR, LON_STR, TIME_STR, TIME_BOUNDS_STR, - BOUNDS_STR, SFC_AREA_STR, ETA_STR, PHALF_STR, - TIME_WEIGHTS_STR, GRID_ATTRS, ZSURF_STR) +from aospy.data_loader import ( + DataLoader, + DictDataLoader, + GFDLDataLoader, + NestedDictDataLoader, + grid_attrs_to_aospy_names, + set_grid_attrs_as_coords, + _sel_var, + _preprocess_and_rename_grid_attrs, + _maybe_cast_to_float64, +) +from aospy.internal_names import ( + LAT_STR, + LON_STR, + TIME_STR, + TIME_BOUNDS_STR, + BOUNDS_STR, + SFC_AREA_STR, + ETA_STR, + PHALF_STR, + GRID_ATTRS, + ZSURF_STR, +) from aospy.utils import io -from .data.objects.examples import (condensation_rain, convection_rain, precip, - file_map, ROOT_PATH, example_model, bk) - - -def _open_ds_catch_warnings(path): - with warnings.catch_warnings(record=True): - return xr.open_dataset(path) +from .data.objects.examples import ( + condensation_rain, + convection_rain, + precip, + file_map, + ROOT_PATH, + example_model, + bk, +) @pytest.mark.parametrize( @@ -72,7 +87,7 @@ def var_name(): @pytest.fixture() -def ds(alt_lat_str, var_name): +def ds_with_time_bounds(alt_lat_str, var_name): time_bounds = np.array([[0, 31], [31, 59], [59, 90]]) bounds = np.array([0, 1]) time = np.array([15, 46, 74]) @@ -94,14 +109,14 @@ def ds(alt_lat_str, var_name): @pytest.fixture() -def inst_ds(ds): +def ds_inst(ds_with_time_bounds): inst_time = np.array([3, 6, 9]) inst_units_str = 'hours since 2000-01-01 00:00:00' - inst_ds = ds.copy() - inst_ds.drop(TIME_BOUNDS_STR) - inst_ds[TIME_STR].values = inst_time - inst_ds[TIME_STR].attrs['units'] = inst_units_str - return inst_ds + ds_inst = ds_with_time_bounds.copy() + ds_inst.drop(TIME_BOUNDS_STR) + ds_inst[TIME_STR].values = inst_time + ds_inst[TIME_STR].attrs['units'] = inst_units_str + return ds_inst def _gfdl_data_loader_kwargs(data_start_date, data_end_date): @@ -152,20 +167,19 @@ def gfdl_data_loader(request): return data_loader_type(**kwargs) -def test_rename_grid_attrs_ds(ds, alt_lat_str): - assert LAT_STR not in ds - assert alt_lat_str in ds - ds = grid_attrs_to_aospy_names(ds) +def test_rename_grid_attrs_ds(ds_with_time_bounds, alt_lat_str): + assert LAT_STR not in ds_with_time_bounds + assert alt_lat_str in ds_with_time_bounds + ds = grid_attrs_to_aospy_names(ds_with_time_bounds) assert LAT_STR in ds -def test_rename_grid_attrs_dim_no_coord(ds, var_name): +def test_rename_grid_attrs_dim_no_coord(ds_with_time_bounds, var_name): bounds_dim = 'nv' - assert bounds_dim not in ds + assert bounds_dim not in ds_with_time_bounds assert bounds_dim in GRID_ATTRS[BOUNDS_STR] - # Create DataArray with all dims lacking coords - values = ds[var_name].values - arr = xr.DataArray(values, name='dummy') + # Create DataArray with all dims lacking coords. + arr = xr.DataArray(ds_with_time_bounds[var_name].values, name='dummy') # Insert name to be replaced (its physical meaning doesn't matter here) ds = arr.rename({'dim_0': bounds_dim}).to_dataset() assert not ds[bounds_dim].coords @@ -173,42 +187,42 @@ def test_rename_grid_attrs_dim_no_coord(ds, var_name): assert not result[BOUNDS_STR].coords -def test_rename_grid_attrs_skip_scalar_dim(ds): +def test_rename_grid_attrs_skip_scalar_dim(ds_with_time_bounds): phalf_dim = 'phalf' - assert phalf_dim not in ds + assert phalf_dim not in ds_with_time_bounds assert phalf_dim in GRID_ATTRS[PHALF_STR] - ds_copy = ds.copy() + ds_copy = ds_with_time_bounds.copy() ds_copy[phalf_dim] = 4 ds_copy = ds_copy.set_coords(phalf_dim) result = grid_attrs_to_aospy_names(ds_copy) xr.testing.assert_identical(result[phalf_dim], ds_copy[phalf_dim]) -def test_rename_grid_attrs_copy_attrs(ds, alt_lat_str): +def test_rename_grid_attrs_copy_attrs(ds_with_time_bounds, alt_lat_str): orig_attrs = {'dummy_key': 'dummy_val'} - ds_orig = ds.copy() + ds_orig = ds_with_time_bounds.copy() ds_orig[alt_lat_str].attrs = orig_attrs ds = grid_attrs_to_aospy_names(ds_orig) assert ds[LAT_STR].attrs == orig_attrs -def test_rename_grid_attrs_custom(ds, alt_lat_str): - assert LAT_STR not in ds - ds = ds.rename({alt_lat_str: 'custom_lat_name'}) +def test_rename_grid_attrs_custom(ds_with_time_bounds, alt_lat_str): + assert LAT_STR not in ds_with_time_bounds + ds = ds_with_time_bounds.rename({alt_lat_str: 'custom_lat_name'}) ds = grid_attrs_to_aospy_names(ds, {LAT_STR: 'custom_lat_name'}) assert LAT_STR in ds assert 'custom_lat_name' not in ds -def test_rename_grid_attrs_custom_error(ds, alt_lat_str): - assert LAT_STR not in ds - ds = ds.rename({alt_lat_str: 'custom_lat_name'}) +def test_rename_grid_attrs_custom_error(ds_with_time_bounds, alt_lat_str): + assert LAT_STR not in ds_with_time_bounds + ds = ds_with_time_bounds.rename({alt_lat_str: 'custom_lat_name'}) with pytest.raises(ValueError): ds = grid_attrs_to_aospy_names(ds, {alt_lat_str: 'custom_lat_name'}) -def test_set_grid_attrs_as_coords(ds, var_name): - ds = grid_attrs_to_aospy_names(ds) +def test_set_grid_attrs_as_coords(ds_with_time_bounds, var_name): + ds = grid_attrs_to_aospy_names(ds_with_time_bounds) sfc_area = ds[var_name].isel(**{TIME_STR: 0}).drop(TIME_STR) ds[SFC_AREA_STR] = sfc_area @@ -238,9 +252,9 @@ def test_sel_var(): _sel_var(ds, precip) -def test_maybe_apply_time_shift(data_loader, ds, inst_ds, var_name, - generate_file_set_args): - ds = xr.decode_cf(ds) +def test_maybe_apply_time_shift(data_loader, ds_with_time_bounds, ds_inst, + var_name, generate_file_set_args): + ds = xr.decode_cf(ds_with_time_bounds) da = ds[var_name] result = data_loader._maybe_apply_time_shift( @@ -257,21 +271,21 @@ def test_maybe_apply_time_shift(data_loader, ds, inst_ds, var_name, assert result.identical(expected) -def test_maybe_apply_time_shift_ts(gfdl_data_loader, ds, var_name, - generate_file_set_args): - ds = xr.decode_cf(ds) +def test_maybe_apply_time_shift_ts(gfdl_data_loader, ds_with_time_bounds, + var_name, generate_file_set_args): + ds = xr.decode_cf(ds_with_time_bounds) da = ds[var_name] result = gfdl_data_loader._maybe_apply_time_shift( da.copy(), **generate_file_set_args)[TIME_STR] assert result.identical(da[TIME_STR]) -def test_maybe_apply_time_shift_inst(gfdl_data_loader, inst_ds, var_name, +def test_maybe_apply_time_shift_inst(gfdl_data_loader, ds_inst, var_name, generate_file_set_args): - inst_ds = xr.decode_cf(inst_ds) + ds_inst = xr.decode_cf(ds_inst) generate_file_set_args['dtype_in_time'] = 'inst' generate_file_set_args['intvl_in'] = '3hr' - da = inst_ds[var_name] + da = ds_inst[var_name] result = gfdl_data_loader._maybe_apply_time_shift( da.copy(), **generate_file_set_args)[TIME_STR] @@ -280,7 +294,7 @@ def test_maybe_apply_time_shift_inst(gfdl_data_loader, inst_ds, var_name, assert result.identical(expected) generate_file_set_args['intvl_in'] = 'daily' - da = inst_ds[var_name] + da = ds_inst[var_name] result = gfdl_data_loader._maybe_apply_time_shift( da.copy(), **generate_file_set_args)[TIME_STR] @@ -289,13 +303,7 @@ def test_maybe_apply_time_shift_inst(gfdl_data_loader, inst_ds, var_name, assert result.identical(expected) -def test_prep_time_data(inst_ds): - assert (TIME_WEIGHTS_STR not in inst_ds) - ds = _prep_time_data(inst_ds) - assert (TIME_WEIGHTS_STR in ds) - - -def test_preprocess_and_rename_grid_attrs(ds, alt_lat_str): +def test_preprocess_and_rename_grid_attrs(ds_with_time_bounds, alt_lat_str): def preprocess_func(ds, **kwargs): # Corrupt a grid attribute name so that we test # that grid_attrs_to_aospy_names is still called @@ -304,14 +312,15 @@ def preprocess_func(ds, **kwargs): ds.attrs['a'] = 'b' return ds - assert LAT_STR not in ds - assert alt_lat_str in ds - assert LON_STR in ds + assert LAT_STR not in ds_with_time_bounds + assert alt_lat_str in ds_with_time_bounds + assert LON_STR in ds_with_time_bounds - expected = ds.rename({alt_lat_str: LAT_STR}) + expected = ds_with_time_bounds.rename({alt_lat_str: LAT_STR}) expected = expected.set_coords(TIME_BOUNDS_STR) expected.attrs['a'] = 'b' - result = _preprocess_and_rename_grid_attrs(preprocess_func)(ds) + result = _preprocess_and_rename_grid_attrs(preprocess_func)( + ds_with_time_bounds) xr.testing.assert_identical(result, expected) @@ -347,7 +356,7 @@ def test_generate_file_set(data_loader, generate_file_set_args): data_loader._generate_file_set(**generate_file_set_args) -def test_overriding_constructor(gfdl_data_loader, ds): +def test_overriding_constructor(gfdl_data_loader, ds_with_time_bounds): new = GFDLDataLoader(gfdl_data_loader, data_direc=os.path.join('.', 'a')) assert new.data_direc == os.path.join('.', 'a') @@ -369,7 +378,8 @@ def test_overriding_constructor(gfdl_data_loader, ds): assert new.data_end_date == datetime.datetime(2003, 12, 31) new = GFDLDataLoader(gfdl_data_loader, preprocess_func=lambda ds: ds) - xr.testing.assert_identical(new.preprocess_func(ds), ds) + xr.testing.assert_identical(new.preprocess_func(ds_with_time_bounds), + ds_with_time_bounds) new = GFDLDataLoader(gfdl_data_loader, upcast_float32=True) assert new.upcast_float32 @@ -582,7 +592,7 @@ def test_load_variable(load_variable_data_loader, start_date, end_date): condensation_rain, start_date, end_date, intvl_in='monthly') filepath = os.path.join(os.path.split(ROOT_PATH)[0], 'netcdf', '00050101.precip_monthly.nc') - expected = _open_ds_catch_warnings(filepath)['condensation_rain'] + expected = xr.open_dataset(filepath)['condensation_rain'] np.testing.assert_array_equal(result.values, expected.values) @@ -715,7 +725,7 @@ def preprocess(ds, **kwargs): intvl_in='monthly') filepath = os.path.join(os.path.split(ROOT_PATH)[0], 'netcdf', '000{}0101.precip_monthly.nc'.format(year)) - expected = _open_ds_catch_warnings(filepath)['condensation_rain'] + expected = xr.open_dataset(filepath)['condensation_rain'] np.testing.assert_allclose(result.values, expected.values) @@ -733,7 +743,7 @@ def preprocess(ds, **kwargs): intvl_in='monthly') filepath = os.path.join(os.path.split(ROOT_PATH)[0], 'netcdf', '00050101.precip_monthly.nc') - expected = 10. * _open_ds_catch_warnings(filepath)['condensation_rain'] + expected = 10. * xr.open_dataset(filepath)['condensation_rain'] np.testing.assert_allclose(result.values, expected.values) result = load_variable_data_loader.load_variable( @@ -742,7 +752,7 @@ def preprocess(ds, **kwargs): intvl_in='monthly') filepath = os.path.join(os.path.split(ROOT_PATH)[0], 'netcdf', '00040101.precip_monthly.nc') - expected = _open_ds_catch_warnings(filepath)['condensation_rain'] + expected = xr.open_dataset(filepath)['condensation_rain'] np.testing.assert_allclose(result.values, expected.values) @@ -771,7 +781,7 @@ def test_recursively_compute_variable_native(load_variable_data_loader): intvl_in='monthly') filepath = os.path.join(os.path.split(ROOT_PATH)[0], 'netcdf', '00050101.precip_monthly.nc') - expected = _open_ds_catch_warnings(filepath)['condensation_rain'] + expected = xr.open_dataset(filepath)['condensation_rain'] np.testing.assert_array_equal(result.values, expected.values) @@ -784,7 +794,7 @@ def test_recursively_compute_variable_one_level(load_variable_data_loader): intvl_in='monthly') filepath = os.path.join(os.path.split(ROOT_PATH)[0], 'netcdf', '00050101.precip_monthly.nc') - expected = 2. * _open_ds_catch_warnings(filepath)['condensation_rain'] + expected = 2. * xr.open_dataset(filepath)['condensation_rain'] np.testing.assert_array_equal(result.values, expected.values) @@ -800,7 +810,7 @@ def test_recursively_compute_variable_multi_level(load_variable_data_loader): intvl_in='monthly') filepath = os.path.join(os.path.split(ROOT_PATH)[0], 'netcdf', '00050101.precip_monthly.nc') - expected = 3. * _open_ds_catch_warnings(filepath)['condensation_rain'] + expected = 3. * xr.open_dataset(filepath)['condensation_rain'] np.testing.assert_array_equal(result.values, expected.values) @@ -811,7 +821,7 @@ def test_recursively_compute_grid_attr(load_variable_data_loader): intvl_in='monthly') filepath = os.path.join(os.path.split(ROOT_PATH)[0], 'netcdf', '00060101.sphum_monthly.nc') - expected = _open_ds_catch_warnings(filepath)['bk'] + expected = xr.open_dataset(filepath)['bk'] np.testing.assert_array_equal(result.values, expected.values) @@ -828,7 +838,7 @@ def test_recursively_compute_grid_attr_multi_level(load_variable_data_loader): intvl_in='monthly') filepath = os.path.join(os.path.split(ROOT_PATH)[0], 'netcdf', '00060101.sphum_monthly.nc') - expected = 3 * _open_ds_catch_warnings(filepath)['bk'] + expected = 3 * xr.open_dataset(filepath)['bk'] np.testing.assert_array_equal(result.values, expected.values) diff --git a/aospy/test/test_utils_times.py b/aospy/test/test_utils_times.py index 3fe4bd4..1ee029d 100755 --- a/aospy/test/test_utils_times.py +++ b/aospy/test/test_utils_times.py @@ -1,7 +1,6 @@ #!/usr/bin/env python """Test suite for aospy.timedate module.""" import datetime -import warnings import cftime import numpy as np @@ -13,9 +12,15 @@ from aospy.data_loader import set_grid_attrs_as_coords from aospy.internal_names import ( - TIME_STR, TIME_BOUNDS_STR, BOUNDS_STR, TIME_WEIGHTS_STR, - RAW_START_DATE_STR, RAW_END_DATE_STR, SUBSET_START_DATE_STR, - SUBSET_END_DATE_STR + BOUNDS_STR, + RAW_START_DATE_STR, + RAW_END_DATE_STR, + SUBSET_START_DATE_STR, + SUBSET_END_DATE_STR, + TIME_BOUNDS_STR, + TIME_STR, + TIME_WEIGHTS_STR, + ) from aospy.automate import _merge_dicts from aospy.utils.times import ( @@ -36,7 +41,8 @@ sel_time, yearly_average, infer_year, - maybe_convert_to_index_date_type + maybe_convert_to_index_date_type, + prep_time_data, ) @@ -203,27 +209,6 @@ def test_extract_months_single_month(): xr.testing.assert_identical(actual, desired) -@pytest.fixture -def ds_time_encoded_cf(): - time_bounds = np.array([[0, 31], [31, 59], [59, 90]]) - nv = np.array([0, 1]) - time = np.array([15, 46, 74]) - data = np.zeros((3)) - ds = xr.DataArray(data, - coords=[time], - dims=[TIME_STR], - name='a').to_dataset() - ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds, - coords=[time, nv], - dims=[TIME_STR, BOUNDS_STR], - name=TIME_BOUNDS_STR) - units_str = 'days since 2000-01-01 00:00:00' - cal_str = 'noleap' - ds[TIME_STR].attrs['units'] = units_str - ds[TIME_STR].attrs['calendar'] = cal_str - return ds - - def test_ensure_time_avg_has_cf_metadata(ds_time_encoded_cf): ds = ds_time_encoded_cf time = ds[TIME_STR].values @@ -334,7 +319,6 @@ def test_assert_has_data_for_time(): } -@pytest.mark.filterwarnings('ignore:The enable_cftimeindex') @pytest.mark.filterwarnings('ignore:Unable to decode') @pytest.mark.parametrize(['calendar', 'date_type'], list(_CFTIME_DATE_TYPES.items())) @@ -358,9 +342,7 @@ def test_assert_has_data_for_time_cftime_datetimes(calendar, date_type): ds = ensure_time_avg_has_cf_metadata(ds) ds = set_grid_attrs_as_coords(ds) - with warnings.catch_warnings(record=True): - with xr.set_options(enable_cftimeindex=True): - ds = xr.decode_cf(ds) + ds = xr.decode_cf(ds) da = ds[var_name] start_date = date_type(2, 1, 2) @@ -426,6 +408,11 @@ def test_assert_matching_time_coord(): assert_matching_time_coord(arr1, arr2) +def test_ensure_time_as_index_ds_no_times(ds_no_time): + with pytest.raises(ValueError): + ensure_time_as_index(ds_no_time) + + def test_ensure_time_as_index_no_change(): # Already properly indexed, so shouldn't be modified. arr = xr.DataArray([-23, 42.4], coords=[[1, 2]], dims=[TIME_STR]) @@ -455,8 +442,7 @@ def test_ensure_time_as_index_with_change(): [[3.5, 4.5]], dims=[TIME_STR, BOUNDS_STR], coords={TIME_STR: arr[TIME_STR]} ) - ds = ds.isel(**{TIME_STR: 0}).expand_dims(TIME_STR) - initial_ds = ds.copy() + ds = ds.isel(**{TIME_STR: 0}) actual = ensure_time_as_index(ds) expected = arr.to_dataset(name='a') expected.coords[TIME_WEIGHTS_STR] = xr.DataArray( @@ -467,9 +453,6 @@ def test_ensure_time_as_index_with_change(): coords={TIME_STR: arr[TIME_STR]} ) xr.testing.assert_identical(actual, expected) - # Make sure input Dataset was not mutated by the call - # to ensure_time_as_index - xr.testing.assert_identical(ds, initial_ds) def test_sel_time(): @@ -633,3 +616,17 @@ def test_infer_year_invalid(date): def test_maybe_convert_to_index_date_type(index, date, expected): result = maybe_convert_to_index_date_type(index, date) assert result == expected + + +def test_prep_time_data_with_time_bounds(ds_with_time_bounds): + assert (TIME_BOUNDS_STR in ds_with_time_bounds) + assert (TIME_WEIGHTS_STR not in ds_with_time_bounds) + result = prep_time_data(ds_with_time_bounds) + assert (TIME_WEIGHTS_STR in result) + + +def test_prep_time_data_no_time_bounds(ds_inst, caplog): + assert (TIME_BOUNDS_STR not in ds_inst) + prep_time_data(ds_inst) + log_record = caplog.record_tuples[0][-1] + assert log_record.startswith("dt array not found.") diff --git a/aospy/utils/times.py b/aospy/utils/times.py index 702e3ec..e8683a4 100644 --- a/aospy/utils/times.py +++ b/aospy/utils/times.py @@ -115,7 +115,9 @@ def monthly_mean_ts(arr): monthly_mean_at_each_ind : Copy monthly means to each submonthly time """ - return arr.resample(**{TIME_STR: '1M'}).mean(TIME_STR).dropna(TIME_STR) + return arr.resample( + **{TIME_STR: '1M'}, restore_coord_dims=True + ).mean(TIME_STR).dropna(TIME_STR) def monthly_mean_at_each_ind(monthly_means, sub_monthly_timeseries): @@ -140,7 +142,7 @@ def monthly_mean_at_each_ind(monthly_means, sub_monthly_timeseries): time = monthly_means[TIME_STR] start = time.indexes[TIME_STR][0].replace(day=1, hour=0) end = time.indexes[TIME_STR][-1] - new_indices = pd.DatetimeIndex(start=start, end=end, freq='MS') + new_indices = pd.date_range(start=start, end=end, freq='MS') arr_new = monthly_means.reindex(time=new_indices, method='backfill') return arr_new.reindex_like(sub_monthly_timeseries, method='pad') @@ -171,8 +173,8 @@ def yearly_average(arr, dt): yr_str = TIME_STR + '.year' # Retain original data's mask. dt = dt.where(np.isfinite(arr)) - return ((arr*dt).groupby(yr_str).sum(TIME_STR) / - dt.groupby(yr_str).sum(TIME_STR)) + return ((arr*dt).groupby(yr_str, restore_coord_dims=True).sum(TIME_STR) / + dt.groupby(yr_str, restore_coord_dims=True).sum(TIME_STR)) def ensure_datetime(obj): @@ -486,7 +488,7 @@ def assert_matching_time_coord(arr1, arr2): raise ValueError(message.format(arr1[TIME_STR], arr2[TIME_STR])) -def ensure_time_as_index(ds): +def ensure_time_as_index(ds, time_str=TIME_STR): """Ensures that time is an indexed coordinate on relevant quantites. Sometimes when the data we load from disk has only one timestep, the @@ -503,22 +505,34 @@ def ensure_time_as_index(ds): ds : Dataset Dataset with a time coordinate + time_str : str, optional + Name of the time dimension. Default ``aospy.internal_names.TIME_STR``. + Returns ------- Dataset """ + if time_str not in ds.coords: + raise ValueError("Provided dataset does not have a time dimension " + "with the provided name '{}'".format(time_str) + + "\nDataset:\n{}".format(ds)) + is_ds_time_scalar = not ds[time_str].shape + if is_ds_time_scalar: + ds[time_str] = ds[time_str].expand_dims(time_str) + time_indexed_coords = {TIME_WEIGHTS_STR, TIME_BOUNDS_STR} - time_indexed_vars = set(ds.data_vars).union(time_indexed_coords) - time_indexed_vars = time_indexed_vars.intersection(ds.variables) + time_indexed_vars = set(ds.data_vars).union( + time_indexed_coords).intersection(ds.variables) + variables_to_replace = {} for name in time_indexed_vars: - if TIME_STR not in ds[name].indexes: - da = ds[name] - if TIME_STR not in da.dims: - da = ds[name].expand_dims(TIME_STR) - da = da.assign_coords(**{TIME_STR: ds[TIME_STR]}) - variables_to_replace[name] = da + arr = ds[name] + if time_str not in arr.indexes: + if time_str not in arr.dims: + arr = ds[name].expand_dims(time_str) + arr = arr.assign_coords(**{time_str: ds[time_str]}) + variables_to_replace[name] = arr return ds.assign(**variables_to_replace) @@ -612,3 +626,38 @@ def maybe_convert_to_index_date_type(index, date): return date_type(date.year, date.month, date.day, date.hour, date.minute, date.second, date.microsecond) + + +def prep_time_data(ds): + """Prepare time coordinate information in Dataset for use in aospy. + + 1. If the Dataset contains a time bounds coordinate, add attributes + representing the true beginning and end dates of the time interval used + to construct the Dataset + 2. If the Dataset contains a time bounds coordinate, overwrite the time + coordinate values with the averages of the time bounds at each timestep + 3. Decode the times into np.datetime64 objects for time indexing + + Parameters + ---------- + ds : Dataset + Pre-processed Dataset with time coordinate renamed to + internal_names.TIME_STR + + Returns + ------- + Dataset + The processed Dataset + + """ + ds = ensure_time_as_index(ds) + if TIME_BOUNDS_STR in ds: + ds = ensure_time_avg_has_cf_metadata(ds) + ds[TIME_STR] = average_time_bounds(ds) + else: + logging.warning("dt array not found. Assuming equally spaced " + "values in time, even though this may not be " + "the case") + ds = add_uniform_time_weights(ds) + return xr.decode_cf(ds, decode_times=True, decode_coords=False, + mask_and_scale=True) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0b47876..0698b21 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -9,6 +9,28 @@ What's New v0.3.2 (unreleased) =================== +Breaking Changes +---------------- + +- Minimum xarray version is now v0.13. Moving forward, we will likely + continue bumping this up so as to only support the most recent one + or two releases of xarray at any time (:pull:`324`). By `Spencer Hill + `_. + +Enhancements +------------ + +- Calls to ``xarray.open_mfdataset`` now use the + ``combine='by_coords'`` option available as of xarray v0.12.2 + instead of the now-deprecated ``auto_combine`` (:pull:`324`). By + `Spencer Hill `_. + +- Remove obsolete checks for Python 2 vs. 3 and for obsolete warnings + relating to bugs and/or warnings in past xarray versions no longer + supported (:pull:`324`). By `Spencer Hill + `_. + + .. _whats-new.0.3.1: v0.3.1 (19 November 2018) diff --git a/setup.py b/setup.py index 66cc30c..7be9be3 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ 'toolz >= 0.7.2', 'dask >= 0.14', 'distributed >= 1.17.1', - 'xarray >= 0.10.6', + 'xarray >= 0.13', 'cloudpickle >= 0.2.1', 'cftime >= 1.0.0'], tests_require=['pytest >= 3.3'],