From bd78b7f1f46a0fb0a0b0f2d4f4bdbacba55be93d Mon Sep 17 00:00:00 2001 From: Dan Nowacki Date: Thu, 16 May 2019 08:28:29 -0700 Subject: [PATCH 1/6] Implement load_dataset() and load_dataarray() (#2917) * Partial fix for #2841 to improve formatting. Updates formatting to use .format() instead of % operator. Changed all instances of % to .format() and added test for using tuple as key, which errored using % operator. * Revert "Partial fix for #2841 to improve formatting." This reverts commit f17f3ad1a4a2069cd70385af8ad331f644ec66ba. * Implement load_dataset() and load_dataarray() BUG: Fixes #2887 by adding @shoyer solution for load_dataset and load_dataarray, wrappers around open_dataset and open_dataarray which open, load, and close the file and return the Dataset/DataArray TST: Add tests for sequentially opening and writing to files using new functions DOC: Add to whats-new.rst. Also a tiny change to the open_dataset docstring Update docstrings and check for cache in kwargs Undeprecate load_dataset Add to api.rst, fix whats-new.rst typo, raise error instead of warning --- doc/api.rst | 2 ++ doc/whats-new.rst | 12 ++++++-- xarray/__init__.py | 2 +- xarray/backends/api.py | 57 +++++++++++++++++++++++++++++++++-- xarray/tests/test_backends.py | 19 +++++++++++- xarray/tutorial.py | 15 +++------ 6 files changed, 90 insertions(+), 17 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 00b33959eed..0e766f2cf9a 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -460,6 +460,7 @@ Dataset methods :toctree: generated/ open_dataset + load_dataset open_mfdataset open_rasterio open_zarr @@ -487,6 +488,7 @@ DataArray methods :toctree: generated/ open_dataarray + load_dataarray DataArray.to_dataset DataArray.to_netcdf DataArray.to_pandas diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ac1b5269bfa..d904a3814f1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -29,6 +29,12 @@ Enhancements By `James McCreight `_. - Clean up Python 2 compatibility in code (:issue:`2950`) By `Guido Imperiale `_. +- Implement ``load_dataset()`` and ``load_dataarray()`` as alternatives to + ``open_dataset()`` and ``open_dataarray()`` to open, load into memory, + and close files, returning the Dataset or DataArray. These functions are + helpful for avoiding file-lock errors when trying to write to files opened + using ``open_dataset()`` or ``open_dataarray()``. (:issue:`2887`) + By `Dan Nowacki `_. Bug fixes ~~~~~~~~~ @@ -153,9 +159,9 @@ Other enhancements By `Keisuke Fujii `_. - Added :py:meth:`~xarray.Dataset.drop_dims` (:issue:`1949`). By `Kevin Squire `_. -- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=`` - parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for - backwards compatibility. The ``overwrite_encoded_chunks`` parameter is +- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=`` + parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for + backwards compatibility. The ``overwrite_encoded_chunks`` parameter is added to remove the original zarr chunk encoding. By `Lily Wang `_. diff --git a/xarray/__init__.py b/xarray/__init__.py index 773dfe19d01..506cb46de26 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -17,7 +17,7 @@ from .core.options import set_options from .backends.api import (open_dataset, open_dataarray, open_mfdataset, - save_mfdataset) + save_mfdataset, load_dataset, load_dataarray) from .backends.rasterio_ import open_rasterio from .backends.zarr import open_zarr diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 7c5040580fe..01188e92752 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -185,12 +185,64 @@ def _finalize_store(write, store): store.close() +def load_dataset(filename_or_obj, **kwargs): + """Open, load into memory, and close a Dataset from a file or file-like + object. + + This is a thin wrapper around :py:meth:`~xarray.open_dataset`. It differs + from `open_dataset` in that it loads the Dataset into memory, closes the + file, and returns the Dataset. In contrast, `open_dataset` keeps the file + handle open and lazy loads its contents. All parameters are passed directly + to `open_dataset`. See that documentation for further details. + + Returns + ------- + dataset : Dataset + The newly created Dataset. + + See Also + -------- + open_dataset + """ + if 'cache' in kwargs: + raise TypeError('cache has no effect in this context') + + with open_dataset(filename_or_obj, **kwargs) as ds: + return ds.load() + + +def load_dataarray(filename_or_obj, **kwargs): + """Open, load into memory, and close a DataArray from a file or file-like + object containing a single data variable. + + This is a thin wrapper around :py:meth:`~xarray.open_dataarray`. It differs + from `open_dataarray` in that it loads the Dataset into memory, closes the + file, and returns the Dataset. In contrast, `open_dataarray` keeps the file + handle open and lazy loads its contents. All parameters are passed directly + to `open_dataarray`. See that documentation for further details. + + Returns + ------- + datarray : DataArray + The newly created DataArray. + + See Also + -------- + open_dataarray + """ + if 'cache' in kwargs: + raise TypeError('cache has no effect in this context') + + with open_dataarray(filename_or_obj, **kwargs) as da: + return da.load() + + def open_dataset(filename_or_obj, group=None, decode_cf=True, mask_and_scale=None, decode_times=True, autoclose=None, concat_characters=True, decode_coords=True, engine=None, chunks=None, lock=None, cache=None, drop_variables=None, backend_kwargs=None, use_cftime=None): - """Load and decode a dataset from a file or file-like object. + """Open and decode a dataset from a file or file-like object. Parameters ---------- @@ -406,7 +458,8 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True, concat_characters=True, decode_coords=True, engine=None, chunks=None, lock=None, cache=None, drop_variables=None, backend_kwargs=None, use_cftime=None): - """Open an DataArray from a netCDF file containing a single data variable. + """Open an DataArray from a file or file-like object containing a single + data variable. This is designed to read netCDF files with only one data variable. If multiple variables are present then a ValueError is raised. diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a4c0374e158..f31d3bf4f9b 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -19,7 +19,7 @@ import xarray as xr from xarray import ( DataArray, Dataset, backends, open_dataarray, open_dataset, open_mfdataset, - save_mfdataset) + save_mfdataset, load_dataset, load_dataarray) from xarray.backends.common import robust_getitem from xarray.backends.netCDF4_ import _extract_nc4_variable_encoding from xarray.backends.pydap_ import PydapDataStore @@ -2641,6 +2641,23 @@ def test_save_mfdataset_compute_false_roundtrip(self): with open_mfdataset([tmp1, tmp2]) as actual: assert_identical(actual, original) + def test_load_dataset(self): + with create_tmp_file() as tmp: + original = Dataset({'foo': ('x', np.random.randn(10))}) + original.to_netcdf(tmp) + ds = load_dataset(tmp) + # this would fail if we used open_dataset instead of load_dataset + ds.to_netcdf(tmp) + + def test_load_dataarray(self): + with create_tmp_file() as tmp: + original = Dataset({'foo': ('x', np.random.randn(10))}) + original.to_netcdf(tmp) + ds = load_dataarray(tmp) + # this would fail if we used open_dataarray instead of + # load_dataarray + ds.to_netcdf(tmp) + @requires_scipy_or_netCDF4 @requires_pydap diff --git a/xarray/tutorial.py b/xarray/tutorial.py index f54cf7b3889..1a977450ed6 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -27,7 +27,7 @@ def open_dataset(name, cache=True, cache_dir=_default_cache_dir, github_url='https://github.com/pydata/xarray-data', branch='master', **kws): """ - Load a dataset from the online repository (requires internet). + Open a dataset from the online repository (requires internet). If a local copy is found then always use that to avoid network traffic. @@ -91,17 +91,12 @@ def open_dataset(name, cache=True, cache_dir=_default_cache_dir, def load_dataset(*args, **kwargs): """ - `load_dataset` will be removed a future version of xarray. The current - behavior of this function can be achived by using - `tutorial.open_dataset(...).load()`. + Open, load into memory, and close a dataset from the online repository + (requires internet). See Also -------- open_dataset """ - warnings.warn( - "load_dataset` will be removed in a future version of xarray. The " - "current behavior of this function can be achived by using " - "`tutorial.open_dataset(...).load()`.", - DeprecationWarning, stacklevel=2) - return open_dataset(*args, **kwargs).load() + with open_dataset(*args, **kwargs) as ds: + return ds.load() From 66581084a89f75476b581ef74e5226eae2d62a84 Mon Sep 17 00:00:00 2001 From: Kevin Squire Date: Thu, 16 May 2019 08:55:15 -0700 Subject: [PATCH 2/6] Fix rolling.constuct() example (#2967) * The example was using the wrong name for the function (to_datarray), and used the wrong dimension for the window --- xarray/core/rolling.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index ad9b17fef92..c113cfebe2a 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -170,15 +170,15 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA): -------- >>> da = DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) >>> - >>> rolling = da.rolling(a=3) - >>> rolling.to_datarray('window_dim') + >>> rolling = da.rolling(b=3) + >>> rolling.construct('window_dim') array([[[np.nan, np.nan, 0], [np.nan, 0, 1], [0, 1, 2], [1, 2, 3]], [[np.nan, np.nan, 4], [np.nan, 4, 5], [4, 5, 6], [5, 6, 7]]]) Dimensions without coordinates: a, b, window_dim >>> - >>> rolling = da.rolling(a=3, center=True) - >>> rolling.to_datarray('window_dim') + >>> rolling = da.rolling(b=3, center=True) + >>> rolling.construct('window_dim') array([[[np.nan, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, np.nan]], [[np.nan, 4, 5], [4, 5, 6], [5, 6, 7], [6, 7, np.nan]]]) From 0811141e8f985a1f3b95ead92c3850cc74e160a5 Mon Sep 17 00:00:00 2001 From: Peter Hausamann Date: Tue, 21 May 2019 19:37:54 +0200 Subject: [PATCH 3/6] Add transpose_coords option to DataArray.transpose (#2556) * Add transpose_coords option to DataArray.transpose Fixes #1856 * Fix typo * Fix bug in transpose Fix python 2 compatibility * Set default for transpose_coords to None Update documentation * Fix bug in coordinate tranpose Update documentation * Suppress FutureWarning in tests * Add restore_coord_dims parameter to DataArrayGroupBy.apply * Move restore_coord_dims parameter to GroupBy class * Remove restore_coord_dims parameter from DataArrayResample.apply * Update whats-new * Update whats-new --- doc/whats-new.rst | 7 +++++ xarray/core/common.py | 27 ++++++++++++---- xarray/core/dataarray.py | 26 ++++++++++++++-- xarray/core/groupby.py | 25 ++++++++++++--- xarray/plot/plot.py | 14 ++++++--- xarray/tests/test_dataarray.py | 57 ++++++++++++++++++++++++++++------ xarray/tests/test_dataset.py | 12 +++++-- xarray/tests/test_interp.py | 6 ++-- xarray/tests/test_plot.py | 3 +- 9 files changed, 144 insertions(+), 33 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d904a3814f1..ab7c155950c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -27,6 +27,13 @@ Enhancements - Character arrays' character dimension name decoding and encoding handled by ``var.encoding['char_dim_name']`` (:issue:`2895`) By `James McCreight `_. +- :py:meth:`DataArray.transpose` now accepts a keyword argument + ``transpose_coords`` which enables transposition of coordinates in the + same way as :py:meth:`Dataset.transpose`. :py:meth:`DataArray.groupby` + :py:meth:`DataArray.groupby_bins`, and :py:meth:`DataArray.resample` now + accept a keyword argument ``restore_coord_dims`` which keeps the order + of the dimensions of multi-dimensional coordinates intact (:issue:`1856`). + By `Peter Hausamann `_. - Clean up Python 2 compatibility in code (:issue:`2950`) By `Guido Imperiale `_. - Implement ``load_dataset()`` and ``load_dataarray()`` as alternatives to diff --git a/xarray/core/common.py b/xarray/core/common.py index b518e8431fd..00d0383a727 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -441,7 +441,8 @@ def pipe(self, func: Union[Callable[..., T], Tuple[Callable[..., T], str]], else: return func(self, *args, **kwargs) - def groupby(self, group, squeeze: bool = True): + def groupby(self, group, squeeze: bool = True, + restore_coord_dims: Optional[bool] = None): """Returns a GroupBy object for performing grouped operations. Parameters @@ -453,6 +454,9 @@ def groupby(self, group, squeeze: bool = True): If "group" is a dimension of any arrays in this dataset, `squeeze` controls whether the subarrays have a dimension of length 1 along that dimension or if the dimension is squeezed out. + restore_coord_dims : bool, optional + If True, also restore the dimension order of multi-dimensional + coordinates. Returns ------- @@ -485,11 +489,13 @@ def groupby(self, group, squeeze: bool = True): core.groupby.DataArrayGroupBy core.groupby.DatasetGroupBy """ # noqa - return self._groupby_cls(self, group, squeeze=squeeze) + return self._groupby_cls(self, group, squeeze=squeeze, + restore_coord_dims=restore_coord_dims) def groupby_bins(self, group, bins, right: bool = True, labels=None, precision: int = 3, include_lowest: bool = False, - squeeze: bool = True): + squeeze: bool = True, + restore_coord_dims: Optional[bool] = None): """Returns a GroupBy object for performing grouped operations. Rather than using all unique values of `group`, the values are discretized @@ -522,6 +528,9 @@ def groupby_bins(self, group, bins, right: bool = True, labels=None, If "group" is a dimension of any arrays in this dataset, `squeeze` controls whether the subarrays have a dimension of length 1 along that dimension or if the dimension is squeezed out. + restore_coord_dims : bool, optional + If True, also restore the dimension order of multi-dimensional + coordinates. Returns ------- @@ -536,9 +545,11 @@ def groupby_bins(self, group, bins, right: bool = True, labels=None, .. [1] http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html """ # noqa return self._groupby_cls(self, group, squeeze=squeeze, bins=bins, + restore_coord_dims=restore_coord_dims, cut_kwargs={'right': right, 'labels': labels, 'precision': precision, - 'include_lowest': include_lowest}) + 'include_lowest': + include_lowest}) def rolling(self, dim: Optional[Mapping[Hashable, int]] = None, min_periods: Optional[int] = None, center: bool = False, @@ -669,7 +680,7 @@ def resample(self, indexer: Optional[Mapping[Hashable, str]] = None, skipna=None, closed: Optional[str] = None, label: Optional[str] = None, base: int = 0, keep_attrs: Optional[bool] = None, - loffset=None, + loffset=None, restore_coord_dims: Optional[bool] = None, **indexer_kwargs: str): """Returns a Resample object for performing resampling operations. @@ -697,6 +708,9 @@ def resample(self, indexer: Optional[Mapping[Hashable, str]] = None, If True, the object's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. + restore_coord_dims : bool, optional + If True, also restore the dimension order of multi-dimensional + coordinates. **indexer_kwargs : {dim: freq} The keyword arguments form of ``indexer``. One of indexer or indexer_kwargs must be provided. @@ -786,7 +800,8 @@ def resample(self, indexer: Optional[Mapping[Hashable, str]] = None, dims=dim_coord.dims, name=RESAMPLE_DIM) resampler = self._resample_cls(self, group=group, dim=dim_name, grouper=grouper, - resample_dim=RESAMPLE_DIM) + resample_dim=RESAMPLE_DIM, + restore_coord_dims=restore_coord_dims) return resampler diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 8d3836f5d8c..15abdaf4a92 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1405,7 +1405,7 @@ def unstack(self, dim=None): ds = self._to_temp_dataset().unstack(dim) return self._from_temp_dataset(ds) - def transpose(self, *dims) -> 'DataArray': + def transpose(self, *dims, transpose_coords=None) -> 'DataArray': """Return a new DataArray object with transposed dimensions. Parameters @@ -1413,6 +1413,8 @@ def transpose(self, *dims) -> 'DataArray': *dims : str, optional By default, reverse the dimensions. Otherwise, reorder the dimensions to this order. + transpose_coords : boolean, optional + If True, also transpose the coordinates of this DataArray. Returns ------- @@ -1430,8 +1432,28 @@ def transpose(self, *dims) -> 'DataArray': numpy.transpose Dataset.transpose """ + if dims: + if set(dims) ^ set(self.dims): + raise ValueError('arguments to transpose (%s) must be ' + 'permuted array dimensions (%s)' + % (dims, tuple(self.dims))) + variable = self.variable.transpose(*dims) - return self._replace(variable) + if transpose_coords: + coords = {} + for name, coord in self.coords.items(): + coord_dims = tuple(dim for dim in dims if dim in coord.dims) + coords[name] = coord.variable.transpose(*coord_dims) + return self._replace(variable, coords) + else: + if transpose_coords is None \ + and any(self[c].ndim > 1 for c in self.coords): + warnings.warn('This DataArray contains multi-dimensional ' + 'coordinates. In the future, these coordinates ' + 'will be transposed as well unless you specify ' + 'transpose_coords=False.', + FutureWarning, stacklevel=2) + return self._replace(variable) @property def T(self) -> 'DataArray': diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 82a92044caf..d7dcb5b0426 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -197,7 +197,7 @@ class GroupBy(SupportsArithmetic): """ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None, - cut_kwargs={}): + restore_coord_dims=None, cut_kwargs={}): """Create a GroupBy object Parameters @@ -215,6 +215,9 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None, bins : array-like, optional If `bins` is specified, the groups will be discretized into the specified bins by `pandas.cut`. + restore_coord_dims : bool, optional + If True, also restore the dimension order of multi-dimensional + coordinates. cut_kwargs : dict, optional Extra keyword arguments to pass to `pandas.cut` @@ -279,6 +282,16 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None, safe_cast_to_index(group), sort=(bins is None)) unique_coord = IndexVariable(group.name, unique_values) + if isinstance(obj, DataArray) \ + and restore_coord_dims is None \ + and any(obj[c].ndim > 1 for c in obj.coords): + warnings.warn('This DataArray contains multi-dimensional ' + 'coordinates. In the future, the dimension order ' + 'of these coordinates will be restored as well ' + 'unless you specify restore_coord_dims=False.', + FutureWarning, stacklevel=2) + restore_coord_dims = False + # specification for the groupby operation self._obj = obj self._group = group @@ -288,6 +301,7 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None, self._stacked_dim = stacked_dim self._inserted_dims = inserted_dims self._full_index = full_index + self._restore_coord_dims = restore_coord_dims # cached attributes self._groups = None @@ -508,7 +522,8 @@ def lookup_order(dimension): return axis new_order = sorted(stacked.dims, key=lookup_order) - return stacked.transpose(*new_order) + return stacked.transpose( + *new_order, transpose_coords=self._restore_coord_dims) def apply(self, func, shortcut=False, args=(), **kwargs): """Apply a function over each array in the group and concatenate them @@ -558,7 +573,7 @@ def apply(self, func, shortcut=False, args=(), **kwargs): for arr in grouped) return self._combine(applied, shortcut=shortcut) - def _combine(self, applied, shortcut=False): + def _combine(self, applied, restore_coord_dims=False, shortcut=False): """Recombine the applied objects like the original.""" applied_example, applied = peek_at(applied) coord, dim, positions = self._infer_concat_args(applied_example) @@ -580,8 +595,8 @@ def _combine(self, applied, shortcut=False): combined = self._maybe_unstack(combined) return combined - def reduce(self, func, dim=None, axis=None, - keep_attrs=None, shortcut=True, **kwargs): + def reduce(self, func, dim=None, axis=None, keep_attrs=None, + shortcut=True, **kwargs): """Reduce the items in this group by applying `func` along some dimension(s). diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 316d4fb4dd9..d4cb1a7726b 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -64,8 +64,10 @@ def _infer_line_data(darray, x, y, hue): if huename in darray.dims: otherindex = 1 if darray.dims.index(huename) == 0 else 0 otherdim = darray.dims[otherindex] - yplt = darray.transpose(otherdim, huename) - xplt = xplt.transpose(otherdim, huename) + yplt = darray.transpose( + otherdim, huename, transpose_coords=False) + xplt = xplt.transpose( + otherdim, huename, transpose_coords=False) else: raise ValueError('For 2D inputs, hue must be a dimension' + ' i.e. one of ' + repr(darray.dims)) @@ -79,7 +81,9 @@ def _infer_line_data(darray, x, y, hue): if yplt.ndim > 1: if huename in darray.dims: otherindex = 1 if darray.dims.index(huename) == 0 else 0 - xplt = darray.transpose(otherdim, huename) + otherdim = darray.dims[otherindex] + xplt = darray.transpose( + otherdim, huename, transpose_coords=False) else: raise ValueError('For 2D inputs, hue must be a dimension' + ' i.e. one of ' + repr(darray.dims)) @@ -614,9 +618,9 @@ def newplotfunc(darray, x=None, y=None, figsize=None, size=None, yx_dims = (ylab, xlab) dims = yx_dims + tuple(d for d in darray.dims if d not in yx_dims) if dims != darray.dims: - darray = darray.transpose(*dims) + darray = darray.transpose(*dims, transpose_coords=True) elif darray[xlab].dims[-1] == darray.dims[0]: - darray = darray.transpose() + darray = darray.transpose(transpose_coords=True) # Pass the data as a masked ndarray too zval = darray.to_masked_array(copy=False) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 9471ec144c0..43af27d0696 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1681,14 +1681,14 @@ def test_math_with_coords(self): assert_identical(expected, actual) actual = orig[0, :] + orig[:, 0] - assert_identical(expected.T, actual) + assert_identical(expected.transpose(transpose_coords=True), actual) - actual = orig - orig.T + actual = orig - orig.transpose(transpose_coords=True) expected = DataArray(np.zeros((2, 3)), orig.coords) assert_identical(expected, actual) - actual = orig.T - orig - assert_identical(expected.T, actual) + actual = orig.transpose(transpose_coords=True) - orig + assert_identical(expected.transpose(transpose_coords=True), actual) alt = DataArray([1, 1], {'x': [-1, -2], 'c': 'foo', 'd': 555}, 'x') actual = orig + alt @@ -1801,8 +1801,27 @@ def test_stack_nonunique_consistency(self): assert_identical(expected, actual) def test_transpose(self): - assert_equal(self.dv.variable.transpose(), - self.dv.transpose().variable) + da = DataArray(np.random.randn(3, 4, 5), dims=('x', 'y', 'z'), + coords={'x': range(3), 'y': range(4), 'z': range(5), + 'xy': (('x', 'y'), np.random.randn(3, 4))}) + + actual = da.transpose(transpose_coords=False) + expected = DataArray(da.values.T, dims=('z', 'y', 'x'), + coords=da.coords) + assert_equal(expected, actual) + + actual = da.transpose('z', 'y', 'x', transpose_coords=True) + expected = DataArray(da.values.T, dims=('z', 'y', 'x'), + coords={'x': da.x.values, 'y': da.y.values, + 'z': da.z.values, + 'xy': (('y', 'x'), da.xy.values.T)}) + assert_equal(expected, actual) + + with pytest.raises(ValueError): + da.transpose('x', 'y') + + with pytest.warns(FutureWarning): + da.transpose() def test_squeeze(self): assert_equal(self.dv.variable.squeeze(), self.dv.squeeze().variable) @@ -2258,6 +2277,23 @@ def test_groupby_restore_dim_order(self): result = array.groupby(by).apply(lambda x: x.squeeze()) assert result.dims == expected_dims + def test_groupby_restore_coord_dims(self): + array = DataArray(np.random.randn(5, 3), + coords={'a': ('x', range(5)), 'b': ('y', range(3)), + 'c': (('x', 'y'), np.random.randn(5, 3))}, + dims=['x', 'y']) + + for by, expected_dims in [('x', ('x', 'y')), + ('y', ('x', 'y')), + ('a', ('a', 'y')), + ('b', ('x', 'b'))]: + result = array.groupby(by, restore_coord_dims=True).apply( + lambda x: x.squeeze())['c'] + assert result.dims == expected_dims + + with pytest.warns(FutureWarning): + array.groupby('x').apply(lambda x: x.squeeze()) + def test_groupby_first_and_last(self): array = DataArray([1, 2, 3, 4, 5], dims='x') by = DataArray(['a'] * 2 + ['b'] * 3, dims='x', name='ab') @@ -2445,15 +2481,18 @@ def test_resample_drop_nondim_coords(self): array = ds['data'] # Re-sample - actual = array.resample(time="12H").mean('time') + actual = array.resample( + time="12H", restore_coord_dims=True).mean('time') assert 'tc' not in actual.coords # Up-sample - filling - actual = array.resample(time="1H").ffill() + actual = array.resample( + time="1H", restore_coord_dims=True).ffill() assert 'tc' not in actual.coords # Up-sample - interpolation - actual = array.resample(time="1H").interpolate('linear') + actual = array.resample( + time="1H", restore_coord_dims=True).interpolate('linear') assert 'tc' not in actual.coords def test_resample_keep_attrs(self): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index b47e26328ad..ecacf43caf4 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4062,14 +4062,20 @@ def test_dataset_math_errors(self): def test_dataset_transpose(self): ds = Dataset({'a': (('x', 'y'), np.random.randn(3, 4)), - 'b': (('y', 'x'), np.random.randn(4, 3))}) + 'b': (('y', 'x'), np.random.randn(4, 3))}, + coords={'x': range(3), 'y': range(4), + 'xy': (('x', 'y'), np.random.randn(3, 4))}) actual = ds.transpose() - expected = ds.apply(lambda x: x.transpose()) + expected = Dataset({'a': (('y', 'x'), ds.a.values.T), + 'b': (('x', 'y'), ds.b.values.T)}, + coords={'x': ds.x.values, 'y': ds.y.values, + 'xy': (('y', 'x'), ds.xy.values.T)}) assert_identical(expected, actual) actual = ds.transpose('x', 'y') - expected = ds.apply(lambda x: x.transpose('x', 'y')) + expected = ds.apply( + lambda x: x.transpose('x', 'y', transpose_coords=True)) assert_identical(expected, actual) ds = create_test_data() diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index 8347d54bd1e..a11e4b9e79a 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -143,7 +143,8 @@ def func(obj, dim, new_x): 'y': da['y'], 'x': ('z', xdest.values), 'x2': ('z', func(da['x2'], 'x', xdest))}) - assert_allclose(actual, expected.transpose('z', 'y')) + assert_allclose(actual, + expected.transpose('z', 'y', transpose_coords=True)) # xdest is 2d xdest = xr.DataArray(np.linspace(0.1, 0.9, 30).reshape(6, 5), @@ -160,7 +161,8 @@ def func(obj, dim, new_x): coords={'z': xdest['z'], 'w': xdest['w'], 'z2': xdest['z2'], 'y': da['y'], 'x': (('z', 'w'), xdest), 'x2': (('z', 'w'), func(da['x2'], 'x', xdest))}) - assert_allclose(actual, expected.transpose('z', 'w', 'y')) + assert_allclose(actual, + expected.transpose('z', 'w', 'y', transpose_coords=True)) @pytest.mark.parametrize('case', [3, 4]) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 759a2974ca6..84510da65fe 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -1202,7 +1202,8 @@ def test_cmap_and_color_both(self): def test_2d_coord_with_interval(self): for dim in self.darray.dims: - gp = self.darray.groupby_bins(dim, range(15)).mean(dim) + gp = self.darray.groupby_bins( + dim, range(15), restore_coord_dims=True).mean(dim) for kind in ['imshow', 'pcolormesh', 'contourf', 'contour']: getattr(gp.plot, kind)() From 7edf2e20d4c898fbb637da3b3e6ded15808e040b Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 24 May 2019 22:01:31 -0400 Subject: [PATCH 4/6] Remove deprecated pytest.config usages (#2988) * initial attempt at moving away from deprecated pytest.config * whatsnew --- conftest.py | 20 ++++++++++++++++++++ doc/whats-new.rst | 2 ++ xarray/tests/__init__.py | 19 ++----------------- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/conftest.py b/conftest.py index d7f4e0c89bc..ffceb27e753 100644 --- a/conftest.py +++ b/conftest.py @@ -1,5 +1,7 @@ """Configuration for pytest.""" +import pytest + def pytest_addoption(parser): """Add command-line flags for pytest.""" @@ -7,3 +9,21 @@ def pytest_addoption(parser): help="runs flaky tests") parser.addoption("--run-network-tests", action="store_true", help="runs tests requiring a network connection") + + +def pytest_collection_modifyitems(config, items): + + if not config.getoption("--run-flaky"): + skip_flaky = pytest.mark.skip( + reason="set --run-flaky option to run flaky tests") + for item in items: + if "flaky" in item.keywords: + item.add_marker(skip_flaky) + + if not config.getoption("--run-network-tests"): + skip_network = pytest.mark.skip( + reason="set --run-network-tests option to run tests requiring an" + "internet connection") + for item in items: + if "network" in item.keywords: + item.add_marker(skip_network) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ab7c155950c..dfdca55d218 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -54,6 +54,8 @@ Bug fixes By `Deepak Cherian `_. +- Removed usages of `pytest.config`, which is deprecated (:issue:`2988`:) + By `Maximilian Roos `_. .. _whats-new.0.12.1: diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index e9d670e4dd9..5e559fce526 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -108,23 +108,8 @@ def LooseVersion(vstring): else: dask.config.set(scheduler='single-threaded') -# pytest config -try: - _SKIP_FLAKY = not pytest.config.getoption("--run-flaky") - _SKIP_NETWORK_TESTS = not pytest.config.getoption("--run-network-tests") -except (ValueError, AttributeError): - # Can't get config from pytest, e.g., because xarray is installed instead - # of being run from a development version (and hence conftests.py is not - # available). Don't run flaky tests. - _SKIP_FLAKY = True - _SKIP_NETWORK_TESTS = True - -flaky = pytest.mark.skipif( - _SKIP_FLAKY, reason="set --run-flaky option to run flaky tests") -network = pytest.mark.skipif( - _SKIP_NETWORK_TESTS, - reason="set --run-network-tests option to run tests requiring an " - "internet connection") +flaky = pytest.mark.flaky +network = pytest.mark.network @contextmanager From 6dc8b60849fab48f24494859c15a42f078025be6 Mon Sep 17 00:00:00 2001 From: Zach Griffith Date: Sun, 26 May 2019 19:20:54 -0500 Subject: [PATCH 5/6] Add fill_value for concat and auto_combine (#2964) * add fill_value option for concat and auto_combine * add tests for fill_value in concat and auto_combine * remove errant whitespace * add fill_value description to doc-string * add missing assert --- xarray/core/combine.py | 55 +++++++++++++++++++++++------------- xarray/tests/test_combine.py | 42 +++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 20 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 1abd14cd20b..6d922064f6f 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -4,7 +4,7 @@ import pandas as pd -from . import utils +from . import utils, dtypes from .alignment import align from .merge import merge from .variable import IndexVariable, Variable, as_variable @@ -14,7 +14,7 @@ def concat(objs, dim=None, data_vars='all', coords='different', compat='equals', positions=None, indexers=None, mode=None, - concat_over=None): + concat_over=None, fill_value=dtypes.NA): """Concatenate xarray objects along a new or existing dimension. Parameters @@ -66,6 +66,8 @@ def concat(objs, dim=None, data_vars='all', coords='different', List of integer arrays which specifies the integer positions to which to assign each dataset along the concatenated dimension. If not supplied, objects are concatenated in the provided order. + fill_value : scalar, optional + Value to use for newly missing values indexers, mode, concat_over : deprecated Returns @@ -117,7 +119,7 @@ def concat(objs, dim=None, data_vars='all', coords='different', else: raise TypeError('can only concatenate xarray Dataset and DataArray ' 'objects, got %s' % type(first_obj)) - return f(objs, dim, data_vars, coords, compat, positions) + return f(objs, dim, data_vars, coords, compat, positions, fill_value) def _calc_concat_dim_coord(dim): @@ -212,7 +214,8 @@ def process_subset_opt(opt, subset): return concat_over, equals -def _dataset_concat(datasets, dim, data_vars, coords, compat, positions): +def _dataset_concat(datasets, dim, data_vars, coords, compat, positions, + fill_value=dtypes.NA): """ Concatenate a sequence of datasets along a new or existing dimension """ @@ -225,7 +228,8 @@ def _dataset_concat(datasets, dim, data_vars, coords, compat, positions): dim, coord = _calc_concat_dim_coord(dim) # Make sure we're working on a copy (we'll be loading variables) datasets = [ds.copy() for ds in datasets] - datasets = align(*datasets, join='outer', copy=False, exclude=[dim]) + datasets = align(*datasets, join='outer', copy=False, exclude=[dim], + fill_value=fill_value) concat_over, equals = _calc_concat_over(datasets, dim, data_vars, coords) @@ -317,7 +321,7 @@ def ensure_common_dims(vars): def _dataarray_concat(arrays, dim, data_vars, coords, compat, - positions): + positions, fill_value=dtypes.NA): arrays = list(arrays) if data_vars != 'all': @@ -336,14 +340,15 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat, datasets.append(arr._to_temp_dataset()) ds = _dataset_concat(datasets, dim, data_vars, coords, compat, - positions) + positions, fill_value) result = arrays[0]._from_temp_dataset(ds, name) result.name = result_name(arrays) return result -def _auto_concat(datasets, dim=None, data_vars='all', coords='different'): +def _auto_concat(datasets, dim=None, data_vars='all', coords='different', + fill_value=dtypes.NA): if len(datasets) == 1 and dim is None: # There is nothing more to combine, so kick out early. return datasets[0] @@ -366,7 +371,8 @@ def _auto_concat(datasets, dim=None, data_vars='all', coords='different'): 'supply the ``concat_dim`` argument ' 'explicitly') dim, = concat_dims - return concat(datasets, dim=dim, data_vars=data_vars, coords=coords) + return concat(datasets, dim=dim, data_vars=data_vars, + coords=coords, fill_value=fill_value) _CONCAT_DIM_DEFAULT = utils.ReprObject('') @@ -442,7 +448,8 @@ def _check_shape_tile_ids(combined_tile_ids): def _combine_nd(combined_ids, concat_dims, data_vars='all', - coords='different', compat='no_conflicts'): + coords='different', compat='no_conflicts', + fill_value=dtypes.NA): """ Concatenates and merges an N-dimensional structure of datasets. @@ -472,13 +479,14 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all', dim=concat_dim, data_vars=data_vars, coords=coords, - compat=compat) + compat=compat, + fill_value=fill_value) combined_ds = list(combined_ids.values())[0] return combined_ds def _auto_combine_all_along_first_dim(combined_ids, dim, data_vars, - coords, compat): + coords, compat, fill_value=dtypes.NA): # Group into lines of datasets which must be combined along dim # need to sort by _new_tile_id first for groupby to work # TODO remove all these sorted OrderedDicts once python >= 3.6 only @@ -490,7 +498,8 @@ def _auto_combine_all_along_first_dim(combined_ids, dim, data_vars, combined_ids = OrderedDict(sorted(group)) datasets = combined_ids.values() new_combined_ids[new_id] = _auto_combine_1d(datasets, dim, compat, - data_vars, coords) + data_vars, coords, + fill_value) return new_combined_ids @@ -500,18 +509,20 @@ def vars_as_keys(ds): def _auto_combine_1d(datasets, concat_dim=_CONCAT_DIM_DEFAULT, compat='no_conflicts', - data_vars='all', coords='different'): + data_vars='all', coords='different', + fill_value=dtypes.NA): # This is just the old auto_combine function (which only worked along 1D) if concat_dim is not None: dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim sorted_datasets = sorted(datasets, key=vars_as_keys) grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys) concatenated = [_auto_concat(list(ds_group), dim=dim, - data_vars=data_vars, coords=coords) + data_vars=data_vars, coords=coords, + fill_value=fill_value) for id, ds_group in grouped_by_vars] else: concatenated = datasets - merged = merge(concatenated, compat=compat) + merged = merge(concatenated, compat=compat, fill_value=fill_value) return merged @@ -521,7 +532,7 @@ def _new_tile_id(single_id_ds_pair): def _auto_combine(datasets, concat_dims, compat, data_vars, coords, - infer_order_from_coords, ids): + infer_order_from_coords, ids, fill_value=dtypes.NA): """ Calls logic to decide concatenation order before concatenating. """ @@ -550,12 +561,14 @@ def _auto_combine(datasets, concat_dims, compat, data_vars, coords, # Repeatedly concatenate then merge along each dimension combined = _combine_nd(combined_ids, concat_dims, compat=compat, - data_vars=data_vars, coords=coords) + data_vars=data_vars, coords=coords, + fill_value=fill_value) return combined def auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT, - compat='no_conflicts', data_vars='all', coords='different'): + compat='no_conflicts', data_vars='all', coords='different', + fill_value=dtypes.NA): """Attempt to auto-magically combine the given datasets into one. This method attempts to combine a list of datasets into a single entity by inspecting metadata and using a combination of concat and merge. @@ -596,6 +609,8 @@ def auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT, Details are in the documentation of concat coords : {'minimal', 'different', 'all' or list of str}, optional Details are in the documentation of conca + fill_value : scalar, optional + Value to use for newly missing values Returns ------- @@ -622,4 +637,4 @@ def auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT, return _auto_combine(datasets, concat_dims=concat_dims, compat=compat, data_vars=data_vars, coords=coords, infer_order_from_coords=infer_order_from_coords, - ids=False) + ids=False, fill_value=fill_value) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 1d8ed169d29..a477df0b0d4 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -7,6 +7,7 @@ import pytest from xarray import DataArray, Dataset, Variable, auto_combine, concat +from xarray.core import dtypes from xarray.core.combine import ( _auto_combine, _auto_combine_1d, _auto_combine_all_along_first_dim, _check_shape_tile_ids, _combine_nd, _infer_concat_order_from_positions, @@ -237,6 +238,20 @@ def test_concat_multiindex(self): assert expected.equals(actual) assert isinstance(actual.x.to_index(), pd.MultiIndex) + @pytest.mark.parametrize('fill_value', [dtypes.NA, 2, 2.0]) + def test_concat_fill_value(self, fill_value): + datasets = [Dataset({'a': ('x', [2, 3]), 'x': [1, 2]}), + Dataset({'a': ('x', [1, 2]), 'x': [0, 1]})] + if fill_value == dtypes.NA: + # if we supply the default, we expect the missing value for a + # float array + fill_value = np.nan + expected = Dataset({'a': (('t', 'x'), + [[fill_value, 2, 3], [1, 2, fill_value]])}, + {'x': [0, 1, 2]}) + actual = concat(datasets, dim='t', fill_value=fill_value) + assert_identical(actual, expected) + class TestConcatDataArray: def test_concat(self): @@ -306,6 +321,19 @@ def test_concat_lazy(self): assert combined.shape == (2, 3, 3) assert combined.dims == ('z', 'x', 'y') + @pytest.mark.parametrize('fill_value', [dtypes.NA, 2, 2.0]) + def test_concat_fill_value(self, fill_value): + foo = DataArray([1, 2], coords=[('x', [1, 2])]) + bar = DataArray([1, 2], coords=[('x', [1, 3])]) + if fill_value == dtypes.NA: + # if we supply the default, we expect the missing value for a + # float array + fill_value = np.nan + expected = DataArray([[1, 2, fill_value], [1, fill_value, 2]], + dims=['y', 'x'], coords={'x': [1, 2, 3]}) + actual = concat((foo, bar), dim='y', fill_value=fill_value) + assert_identical(actual, expected) + class TestAutoCombine: @@ -417,6 +445,20 @@ def test_auto_combine_no_concat(self): {'baz': [100]}) assert_identical(expected, actual) + @pytest.mark.parametrize('fill_value', [dtypes.NA, 2, 2.0]) + def test_auto_combine_fill_value(self, fill_value): + datasets = [Dataset({'a': ('x', [2, 3]), 'x': [1, 2]}), + Dataset({'a': ('x', [1, 2]), 'x': [0, 1]})] + if fill_value == dtypes.NA: + # if we supply the default, we expect the missing value for a + # float array + fill_value = np.nan + expected = Dataset({'a': (('t', 'x'), + [[fill_value, 2, 3], [1, 2, fill_value]])}, + {'x': [0, 1, 2]}) + actual = auto_combine(datasets, concat_dim='t', fill_value=fill_value) + assert_identical(expected, actual) + def assert_combined_tile_ids_equal(dict1, dict2): assert len(dict1) == len(dict2) From ae1239c58282336b311ee3a6f5d3f4ce5bacdb93 Mon Sep 17 00:00:00 2001 From: Alessandro Amici Date: Tue, 28 May 2019 23:32:26 +0200 Subject: [PATCH 6/6] cfgrib is now part of conda-forge (#2992) --- ci/requirements-py36.yml | 3 +-- ci/requirements-py37.yml | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ci/requirements-py36.yml b/ci/requirements-py36.yml index 03242426a36..d6dafd8d540 100644 --- a/ci/requirements-py36.yml +++ b/ci/requirements-py36.yml @@ -24,12 +24,11 @@ dependencies: - bottleneck - zarr - pseudonetcdf>=3.0.1 - - eccodes + - cfgrib>=0.9.2 - cdms2 - pynio - iris>=1.10 - pydap - lxml - pip: - - cfgrib>=0.9.2 - mypy==0.660 diff --git a/ci/requirements-py37.yml b/ci/requirements-py37.yml index 0cece4ed6dd..c5f5d71b8e5 100644 --- a/ci/requirements-py37.yml +++ b/ci/requirements-py37.yml @@ -25,9 +25,8 @@ dependencies: - bottleneck - zarr - pseudonetcdf>=3.0.1 + - cfgrib>=0.9.2 - lxml - - eccodes - pydap - pip: - - cfgrib>=0.9.2 - mypy==0.650