From 79b3cdd3822c79ad2ee267f4d5082cd91c7f714c Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 25 Oct 2019 11:15:46 -0400 Subject: [PATCH] change ALL_DIMS to equal ellipsis (#3418) * change ALL_DIMS to equal ... * changed references & added whatsnew * Update xarray/core/groupby.py Co-Authored-By: Deepak Cherian * Update xarray/core/groupby.py Co-Authored-By: Deepak Cherian * note in readme --- doc/examples/multidimensional-coords.rst | 2 +- doc/groupby.rst | 16 +++++++++++----- doc/whats-new.rst | 5 +++++ xarray/core/common.py | 4 ++-- xarray/core/dataset.py | 5 ++--- xarray/core/groupby.py | 16 ++++++++-------- xarray/core/variable.py | 2 +- xarray/tests/test_dask.py | 4 ++-- xarray/tests/test_dataarray.py | 14 +++++++------- xarray/tests/test_dataset.py | 13 ++++++------- xarray/tests/test_groupby.py | 6 +++--- xarray/tests/test_plot.py | 6 +++--- xarray/tests/test_sparse.py | 8 ++++---- 13 files changed, 55 insertions(+), 46 deletions(-) diff --git a/doc/examples/multidimensional-coords.rst b/doc/examples/multidimensional-coords.rst index a5084043977..55569b7662a 100644 --- a/doc/examples/multidimensional-coords.rst +++ b/doc/examples/multidimensional-coords.rst @@ -107,7 +107,7 @@ function to specify the output coordinates of the group. lat_center = np.arange(1, 90, 2) # group according to those bins and take the mean Tair_lat_mean = (ds.Tair.groupby_bins('xc', lat_bins, labels=lat_center) - .mean(xr.ALL_DIMS)) + .mean(...)) # plot the result @savefig xarray_multidimensional_coords_14_1.png width=5in Tair_lat_mean.plot(); diff --git a/doc/groupby.rst b/doc/groupby.rst index e1d88e289d2..52a27f4f160 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -116,7 +116,13 @@ dimensions *other than* the provided one: .. ipython:: python - ds.groupby('x').std(xr.ALL_DIMS) + ds.groupby('x').std(...) + +.. note:: + + We use an ellipsis (`...`) here to indicate we want to reduce over all + other dimensions + First and last ~~~~~~~~~~~~~~ @@ -127,7 +133,7 @@ values for group along the grouped dimension: .. ipython:: python - ds.groupby('letters').first(xr.ALL_DIMS) + ds.groupby('letters').first(...) By default, they skip missing values (control this with ``skipna``). @@ -142,7 +148,7 @@ coordinates. For example: .. ipython:: python - alt = arr.groupby('letters').mean(xr.ALL_DIMS) + alt = arr.groupby('letters').mean(...) alt ds.groupby('letters') - alt @@ -195,7 +201,7 @@ __ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimen 'lat': (['ny','nx'], [[10,10],[20,20]] ),}, dims=['ny','nx']) da - da.groupby('lon').sum(xr.ALL_DIMS) + da.groupby('lon').sum(...) da.groupby('lon').apply(lambda x: x - x.mean(), shortcut=False) Because multidimensional groups have the ability to generate a very large @@ -213,4 +219,4 @@ applying your function, and then unstacking the result: .. ipython:: python stacked = da.stack(gridcell=['ny', 'nx']) - stacked.groupby('gridcell').sum(xr.ALL_DIMS).unstack('gridcell') + stacked.groupby('gridcell').sum(...).unstack('gridcell') diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 12bed8f332e..ac60994d35b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,11 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Changed `xr.ALL_DIMS` to equal python's `Ellipsis` (`...`), and changed internal usages to use + `...` directly. As before, you can use this to instruct a `groupby` operation + to reduce over all dimensions. While we have no plans to remove `xr.ALL_DIMS`, we suggest + using `...`. + By `Maximilian Roos `_ - Added integration tests against `pint `_. (:pull:`3238`) by `Justus Magin `_. diff --git a/xarray/core/common.py b/xarray/core/common.py index 1a8cf34ed39..d372115ea57 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -25,10 +25,10 @@ from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .rolling_exp import RollingExp -from .utils import Frozen, ReprObject, either_dict_or_kwargs +from .utils import Frozen, either_dict_or_kwargs # Used as a sentinel value to indicate a all dimensions -ALL_DIMS = ReprObject("") +ALL_DIMS = ... C = TypeVar("C") diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index eba580f84bd..55ac0bc6135 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -49,7 +49,6 @@ ) from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align from .common import ( - ALL_DIMS, DataWithCoords, ImplementsDatasetReduce, _contains_datetime_like_objects, @@ -4037,7 +4036,7 @@ def reduce( Dataset with this object's DataArrays replaced with new DataArrays of summarized data and the indicated dimension(s) removed. """ - if dim is None or dim is ALL_DIMS: + if dim is None or dim is ...: dims = set(self.dims) elif isinstance(dim, str) or not isinstance(dim, Iterable): dims = {dim} @@ -5002,7 +5001,7 @@ def quantile( if isinstance(dim, str): dims = {dim} - elif dim is None or dim is ALL_DIMS: + elif dim in [None, ...]: dims = set(self.dims) else: dims = set(dim) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 52eb17df18d..68bd28ddb12 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -7,7 +7,7 @@ from . import dtypes, duck_array_ops, nputils, ops from .arithmetic import SupportsArithmetic -from .common import ALL_DIMS, ImplementsArrayReduce, ImplementsDatasetReduce +from .common import ImplementsArrayReduce, ImplementsDatasetReduce from .concat import concat from .formatting import format_array_flat from .options import _get_keep_attrs @@ -712,7 +712,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): q : float in range of [0,1] (or sequence of floats) Quantile to compute, which must be between 0 and 1 inclusive. - dim : xarray.ALL_DIMS, str or sequence of str, optional + dim : `...`, str or sequence of str, optional Dimension(s) over which to apply quantile. Defaults to the grouped dimension. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} @@ -769,7 +769,7 @@ def reduce( Function which can be called in the form `func(x, axis=axis, **kwargs)` to return the result of collapsing an np.ndarray over an integer valued axis. - dim : xarray.ALL_DIMS, str or sequence of str, optional + dim : `...`, str or sequence of str, optional Dimension(s) over which to apply `func`. axis : int or sequence of int, optional Axis(es) over which to apply `func`. Only one of the 'dimension' @@ -794,9 +794,9 @@ def reduce( if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) - if dim is not ALL_DIMS and dim not in self.dims: + if dim is not ... and dim not in self.dims: raise ValueError( - "cannot reduce over dimension %r. expected either xarray.ALL_DIMS to reduce over all dimensions or one or more of %r." + "cannot reduce over dimension %r. expected either '...' to reduce over all dimensions or one or more of %r." % (dim, self.dims) ) @@ -867,7 +867,7 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): Function which can be called in the form `func(x, axis=axis, **kwargs)` to return the result of collapsing an np.ndarray over an integer valued axis. - dim : xarray.ALL_DIMS, str or sequence of str, optional + dim : `...`, str or sequence of str, optional Dimension(s) over which to apply `func`. axis : int or sequence of int, optional Axis(es) over which to apply `func`. Only one of the 'dimension' @@ -895,9 +895,9 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): def reduce_dataset(ds): return ds.reduce(func, dim, keep_attrs, **kwargs) - if dim is not ALL_DIMS and dim not in self.dims: + if dim is not ... and dim not in self.dims: raise ValueError( - "cannot reduce over dimension %r. expected either xarray.ALL_DIMS to reduce over all dimensions or one or more of %r." + "cannot reduce over dimension %r. expected either '...' to reduce over all dimensions or one or more of %r." % (dim, self.dims) ) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 37672cd82d9..93ad1eafb97 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1450,7 +1450,7 @@ def reduce( Array with summarized data and the indicated dimension(s) removed. """ - if dim is common.ALL_DIMS: + if dim == ...: dim = None if dim is not None and axis is not None: raise ValueError("cannot supply both 'axis' and 'dim' arguments") diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index ae8f43cb66d..50517ae3c9c 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -435,8 +435,8 @@ def test_groupby(self): u = self.eager_array v = self.lazy_array - expected = u.groupby("x").mean(xr.ALL_DIMS) - actual = v.groupby("x").mean(xr.ALL_DIMS) + expected = u.groupby("x").mean(...) + actual = v.groupby("x").mean(...) self.assertLazyAndAllClose(expected, actual) def test_groupby_first(self): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index a3a2f55f6cc..b13527bc098 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -13,7 +13,7 @@ from xarray.coding.times import CFDatetimeCoder from xarray.convert import from_cdms2 from xarray.core import dtypes -from xarray.core.common import ALL_DIMS, full_like +from xarray.core.common import full_like from xarray.tests import ( LooseVersion, ReturnItem, @@ -2443,8 +2443,8 @@ def test_groupby_sum(self): "abc": Variable(["abc"], np.array(["a", "b", "c"])), } )["foo"] - assert_allclose(expected_sum_all, grouped.reduce(np.sum, dim=ALL_DIMS)) - assert_allclose(expected_sum_all, grouped.sum(ALL_DIMS)) + assert_allclose(expected_sum_all, grouped.reduce(np.sum, dim=...)) + assert_allclose(expected_sum_all, grouped.sum(...)) expected = DataArray( [ @@ -2456,7 +2456,7 @@ def test_groupby_sum(self): ) actual = array["y"].groupby("abc").apply(np.sum) assert_allclose(expected, actual) - actual = array["y"].groupby("abc").sum(ALL_DIMS) + actual = array["y"].groupby("abc").sum(...) assert_allclose(expected, actual) expected_sum_axis1 = Dataset( @@ -2590,9 +2590,9 @@ def test_groupby_math(self): assert_identical(expected, actual) grouped = array.groupby("abc") - expected_agg = (grouped.mean(ALL_DIMS) - np.arange(3)).rename(None) + expected_agg = (grouped.mean(...) - np.arange(3)).rename(None) actual = grouped - DataArray(range(3), [("abc", ["a", "b", "c"])]) - actual_agg = actual.groupby("abc").mean(ALL_DIMS) + actual_agg = actual.groupby("abc").mean(...) assert_allclose(expected_agg, actual_agg) with raises_regex(TypeError, "only support binary ops"): @@ -2698,7 +2698,7 @@ def test_groupby_multidim(self): ("lon", DataArray([5, 28, 23], coords=[("lon", [30.0, 40.0, 50.0])])), ("lat", DataArray([16, 40], coords=[("lat", [10.0, 20.0])])), ]: - actual_sum = array.groupby(dim).sum(ALL_DIMS) + actual_sum = array.groupby(dim).sum(...) assert_identical(expected_sum, actual_sum) def test_groupby_multidim_apply(self): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 006d6881b5a..b3ffdf68e3f 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -11,7 +11,6 @@ import xarray as xr from xarray import ( - ALL_DIMS, DataArray, Dataset, IndexVariable, @@ -3327,7 +3326,7 @@ def test_groupby_reduce(self): expected = data.mean("y") expected["yonly"] = expected["yonly"].variable.set_dims({"x": 3}) - actual = data.groupby("x").mean(ALL_DIMS) + actual = data.groupby("x").mean(...) assert_allclose(expected, actual) actual = data.groupby("x").mean("y") @@ -3336,12 +3335,12 @@ def test_groupby_reduce(self): letters = data["letters"] expected = Dataset( { - "xy": data["xy"].groupby(letters).mean(ALL_DIMS), + "xy": data["xy"].groupby(letters).mean(...), "xonly": (data["xonly"].mean().variable.set_dims({"letters": 2})), "yonly": data["yonly"].groupby(letters).mean(), } ) - actual = data.groupby("letters").mean(ALL_DIMS) + actual = data.groupby("letters").mean(...) assert_allclose(expected, actual) def test_groupby_math(self): @@ -3404,14 +3403,14 @@ def test_groupby_math_virtual(self): {"x": ("t", [1, 2, 3])}, {"t": pd.date_range("20100101", periods=3)} ) grouped = ds.groupby("t.day") - actual = grouped - grouped.mean(ALL_DIMS) + actual = grouped - grouped.mean(...) expected = Dataset({"x": ("t", [0, 0, 0])}, ds[["t", "t.day"]]) assert_identical(actual, expected) def test_groupby_nan(self): # nan should be excluded from groupby ds = Dataset({"foo": ("x", [1, 2, 3, 4])}, {"bar": ("x", [1, 1, 2, np.nan])}) - actual = ds.groupby("bar").mean(ALL_DIMS) + actual = ds.groupby("bar").mean(...) expected = Dataset({"foo": ("bar", [1.5, 3]), "bar": [1, 2]}) assert_identical(actual, expected) @@ -3421,7 +3420,7 @@ def test_groupby_order(self): for vn in ["a", "b", "c"]: ds[vn] = DataArray(np.arange(10), dims=["t"]) data_vars_ref = list(ds.data_vars.keys()) - ds = ds.groupby("t").mean(ALL_DIMS) + ds = ds.groupby("t").mean(...) data_vars = list(ds.data_vars.keys()) assert data_vars == data_vars_ref # coords are now at the end of the list, so the test below fails diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index be494c4ae2b..a6de41beb66 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -147,11 +147,11 @@ def test_da_groupby_quantile(): [("x", [1, 1, 1, 2, 2]), ("y", [0, 0, 1])], ) - actual_x = array.groupby("x").quantile(0, dim=xr.ALL_DIMS) + actual_x = array.groupby("x").quantile(0, dim=...) expected_x = xr.DataArray([1, 4], [("x", [1, 2])]) assert_identical(expected_x, actual_x) - actual_y = array.groupby("y").quantile(0, dim=xr.ALL_DIMS) + actual_y = array.groupby("y").quantile(0, dim=...) expected_y = xr.DataArray([1, 22], [("y", [0, 1])]) assert_identical(expected_y, actual_y) @@ -177,7 +177,7 @@ def test_da_groupby_quantile(): ) g = foo.groupby(foo.time.dt.month) - actual = g.quantile(0, dim=xr.ALL_DIMS) + actual = g.quantile(0, dim=...) expected = xr.DataArray( [ 0.0, diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 3ac45a9720f..7deabd46eae 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -417,7 +417,7 @@ def test_convenient_facetgrid_4d(self): def test_coord_with_interval(self): bins = [-1, 0, 1, 2] - self.darray.groupby_bins("dim_0", bins).mean(xr.ALL_DIMS).plot() + self.darray.groupby_bins("dim_0", bins).mean(...).plot() class TestPlot1D(PlotTestCase): @@ -502,7 +502,7 @@ def test_step(self): def test_coord_with_interval_step(self): bins = [-1, 0, 1, 2] - self.darray.groupby_bins("dim_0", bins).mean(xr.ALL_DIMS).plot.step() + self.darray.groupby_bins("dim_0", bins).mean(...).plot.step() assert len(plt.gca().lines[0].get_xdata()) == ((len(bins) - 1) * 2) @@ -544,7 +544,7 @@ def test_plot_nans(self): def test_hist_coord_with_interval(self): ( self.darray.groupby_bins("dim_0", [-1, 0, 1, 2]) - .mean(xr.ALL_DIMS) + .mean(...) .plot.hist(range=(-1, 2)) ) diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index bd26b96f6d4..73c4b9b8c74 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -756,8 +756,8 @@ def test_dot(self): def test_groupby(self): x1 = self.ds_xr x2 = self.sp_xr - m1 = x1.groupby("x").mean(xr.ALL_DIMS) - m2 = x2.groupby("x").mean(xr.ALL_DIMS) + m1 = x1.groupby("x").mean(...) + m2 = x2.groupby("x").mean(...) assert isinstance(m2.data, sparse.SparseArray) assert np.allclose(m1.data, m2.data.todense()) @@ -772,8 +772,8 @@ def test_groupby_first(self): def test_groupby_bins(self): x1 = self.ds_xr x2 = self.sp_xr - m1 = x1.groupby_bins("x", bins=[0, 3, 7, 10]).sum(xr.ALL_DIMS) - m2 = x2.groupby_bins("x", bins=[0, 3, 7, 10]).sum(xr.ALL_DIMS) + m1 = x1.groupby_bins("x", bins=[0, 3, 7, 10]).sum(...) + m2 = x2.groupby_bins("x", bins=[0, 3, 7, 10]).sum(...) assert isinstance(m2.data, sparse.SparseArray) assert np.allclose(m1.data, m2.data.todense())