Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Future warning for default reduction dimension of groupby #2366

Merged
merged 14 commits into from
Sep 28, 2018
10 changes: 10 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,16 @@ What's New
v0.11.0 (unreleased)
--------------------

Breaking changes
~~~~~~~~~~~~~~~~

- Reduction of :py:meth:`DataArray.groupby` and :py:meth:`DataArray.resample`
without dimension argument will change in the next release.
Now we warn a FutureWarning.
By `Keisuke Fujii <https://github.com/fujiisoup>`_.

Documentation
~~~~~~~~~~~~~
Enhancements
~~~~~~~~~~~~

Expand Down
2 changes: 2 additions & 0 deletions xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,5 @@
from . import tutorial
from . import ufuncs
from . import testing

from .core.common import ALL_DIMS
6 changes: 5 additions & 1 deletion xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
from . import duck_array_ops, dtypes, formatting, ops
from .arithmetic import SupportsArithmetic
from .pycompat import OrderedDict, basestring, dask_array_type, suppress
from .utils import either_dict_or_kwargs, Frozen, SortedKeysDict
from .utils import either_dict_or_kwargs, Frozen, SortedKeysDict, ReprObject


# Used as a sentinel value to indicate a all dimensions
ALL_DIMS = ReprObject('<all-dims>')


class ImplementsArrayReduce(object):
Expand Down
11 changes: 7 additions & 4 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
from .. import conventions
from .alignment import align
from .common import (
DataWithCoords, ImplementsDatasetReduce, _contains_datetime_like_objects)
ALL_DIMS, DataWithCoords, ImplementsDatasetReduce,
_contains_datetime_like_objects)
from .coordinates import (
DatasetCoordinates, Indexes, LevelCoordinatesSource,
assert_coordinate_consistent, remap_label_indexers)
Expand Down Expand Up @@ -743,7 +744,7 @@ def copy(self, deep=False, data=None):
Shallow copy versus deep copy

>>> da = xr.DataArray(np.random.randn(2, 3))
>>> ds = xr.Dataset({'foo': da, 'bar': ('x', [-1, 2])},
>>> ds = xr.Dataset({'foo': da, 'bar': ('x', [-1, 2])},
coords={'x': ['one', 'two']})
>>> ds.copy()
<xarray.Dataset>
Expand Down Expand Up @@ -775,7 +776,7 @@ def copy(self, deep=False, data=None):
foo (dim_0, dim_1) float64 7.0 0.3897 -1.862 -0.6091 -1.051 -0.3003
bar (x) int64 -1 2

Changing the data using the ``data`` argument maintains the
Changing the data using the ``data`` argument maintains the
structure of the original object, but with the new data. Original
object is unaffected.

Expand Down Expand Up @@ -826,7 +827,7 @@ def copy(self, deep=False, data=None):
# skip __init__ to avoid costly validation
return self._construct_direct(variables, self._coord_names.copy(),
self._dims.copy(), self._attrs_copy(),
encoding=self.encoding)
encoding=self.encoding)

def _subset_with_all_valid_coords(self, variables, coord_names, attrs):
needed_dims = set()
Expand Down Expand Up @@ -2893,6 +2894,8 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
Dataset with this object's DataArrays replaced with new DataArrays
of summarized data and the indicated dimension(s) removed.
"""
if dim is ALL_DIMS:
dim = None
if isinstance(dim, basestring):
dims = set([dim])
elif dim is None:
Expand Down
64 changes: 62 additions & 2 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from __future__ import absolute_import, division, print_function

import functools
import warnings

import numpy as np
import pandas as pd

from . import dtypes, duck_array_ops, nputils, ops
from . import dtypes, duck_array_ops, nputils, ops, utils
from .arithmetic import SupportsArithmetic
from .combine import concat
from .common import ImplementsArrayReduce, ImplementsDatasetReduce
from .common import ALL_DIMS, ImplementsArrayReduce, ImplementsDatasetReduce
from .pycompat import integer_types, range, zip
from .utils import hashable, maybe_wrap_array, peek_at, safe_cast_to_index
from .variable import IndexVariable, Variable, as_variable
Expand Down Expand Up @@ -567,10 +568,39 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False,
Array with summarized data and the indicated dimension(s)
removed.
"""
if dim == DEFAULT_DIMS:
dim = ALL_DIMS
# TODO change this to dim = self._group_dim after
# the deprecation process
if self._obj.ndim > 1:
warnings.warn(
"Default reduction dimension will be changed to the "
"grouped dimension after xarray 0.12. To silence this "
"warning, pass dim=xarray.ALL_DIMS explicitly.",
FutureWarning, stacklevel=2)

def reduce_array(ar):
return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs)
return self.apply(reduce_array, shortcut=shortcut)

# TODO remove the following class method and DEFAULT_DIMS after the
# deprecation cycle
@classmethod
def _reduce_method(cls, func, include_skipna, numeric_only):
if include_skipna:
def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, skipna=None,
keep_attrs=False, **kwargs):
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
skipna=skipna, allow_lazy=True, **kwargs)
else:
def wrapped_func(self, dim=DEFAULT_DIMS, axis=None,
keep_attrs=False, **kwargs):
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
allow_lazy=True, **kwargs)
return wrapped_func


DEFAULT_DIMS = utils.ReprObject('<default-dims>')

ops.inject_reduce_methods(DataArrayGroupBy)
ops.inject_binary_ops(DataArrayGroupBy)
Expand Down Expand Up @@ -649,10 +679,40 @@ def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
Array with summarized data and the indicated dimension(s)
removed.
"""
if dim == DEFAULT_DIMS:
dim = ALL_DIMS
# TODO change this to dim = self._group_dim after
# the deprecation process. Do not forget to remove _reduce_method
warnings.warn(
"Default reduction dimension will be changed to the "
"grouped dimension after xarray 0.12. To silence this "
"warning, pass dim=xarray.ALL_DIMS explicitly.",
FutureWarning, stacklevel=2)
elif dim is None:
dim = self._group_dim

def reduce_dataset(ds):
return ds.reduce(func, dim, keep_attrs, **kwargs)
return self.apply(reduce_dataset)

# TODO remove the following class method and DEFAULT_DIMS after the
# deprecation cycle
@classmethod
def _reduce_method(cls, func, include_skipna, numeric_only):
if include_skipna:
def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=False,
skipna=None, **kwargs):
return self.reduce(func, dim, keep_attrs, skipna=skipna,
numeric_only=numeric_only, allow_lazy=True,
**kwargs)
else:
def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=False,
**kwargs):
return self.reduce(func, dim, keep_attrs,
numeric_only=numeric_only, allow_lazy=True,
**kwargs)
return wrapped_func

def assign(self, **kwargs):
"""Assign data variables by group.

Expand Down
12 changes: 7 additions & 5 deletions xarray/core/resample.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import absolute_import, division, print_function

from . import ops
from .groupby import DataArrayGroupBy, DatasetGroupBy
from .groupby import DataArrayGroupBy, DatasetGroupBy, DEFAULT_DIMS
from .pycompat import OrderedDict, dask_array_type

RESAMPLE_DIM = '__resample_dim__'
Expand Down Expand Up @@ -277,15 +277,14 @@ def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
"""Reduce the items in this group by applying `func` along the
pre-defined resampling dimension.

Note that `dim` is by default here and ignored if passed by the user;
this ensures compatibility with the existing reduce interface.

Parameters
----------
func : function
Function which can be called in the form
`func(x, axis=axis, **kwargs)` to return the result of collapsing
an np.ndarray over an integer valued axis.
dim : str or sequence of str, optional
Dimension(s) over which to apply `func`.
keep_attrs : bool, optional
If True, the datasets's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
Expand All @@ -299,8 +298,11 @@ def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
Array with summarized data and the indicated dimension(s)
removed.
"""
if dim == DEFAULT_DIMS:
dim = None

return super(DatasetResample, self).reduce(
func, self._dim, keep_attrs, **kwargs)
func, dim, keep_attrs, **kwargs)

def _interpolate(self, kind='linear'):
"""Apply scipy.interpolate.interp1d along resampling dimension."""
Expand Down
2 changes: 2 additions & 0 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,8 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False,
Array with summarized data and the indicated dimension(s)
removed.
"""
if dim is common.ALL_DIMS:
dim = None
if dim is not None and axis is not None:
raise ValueError("cannot supply both 'axis' and 'dim' arguments")

Expand Down
4 changes: 2 additions & 2 deletions xarray/tests/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,8 +385,8 @@ def test_groupby(self):
u = self.eager_array
v = self.lazy_array

expected = u.groupby('x').mean()
actual = v.groupby('x').mean()
expected = u.groupby('x').mean(xr.ALL_DIMS)
actual = v.groupby('x').mean(xr.ALL_DIMS)
self.assertLazyAndAllClose(expected, actual)

def test_groupby_first(self):
Expand Down
36 changes: 29 additions & 7 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pickle
from copy import deepcopy
from distutils.version import LooseVersion
from textwrap import dedent
import warnings

Expand All @@ -14,7 +15,7 @@
DataArray, Dataset, IndexVariable, Variable, align, broadcast, set_options)
from xarray.convert import from_cdms2
from xarray.coding.times import CFDatetimeCoder, _import_cftime
from xarray.core.common import full_like
from xarray.core.common import full_like, ALL_DIMS
from xarray.core.pycompat import OrderedDict, iteritems
from xarray.tests import (
ReturnItem, TestCase, assert_allclose, assert_array_equal, assert_equal,
Expand Down Expand Up @@ -2000,15 +2001,15 @@ def test_groupby_sum(self):
self.x[:, 10:].sum(),
self.x[:, 9:10].sum()]).T),
'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
assert_allclose(expected_sum_all, grouped.reduce(np.sum))
assert_allclose(expected_sum_all, grouped.sum())
assert_allclose(expected_sum_all, grouped.reduce(np.sum, dim=ALL_DIMS))
assert_allclose(expected_sum_all, grouped.sum(ALL_DIMS))

expected = DataArray([array['y'].values[idx].sum() for idx
in [slice(9), slice(10, None), slice(9, 10)]],
[['a', 'b', 'c']], ['abc'])
actual = array['y'].groupby('abc').apply(np.sum)
assert_allclose(expected, actual)
actual = array['y'].groupby('abc').sum()
actual = array['y'].groupby('abc').sum(ALL_DIMS)
assert_allclose(expected, actual)

expected_sum_axis1 = Dataset(
Expand All @@ -2019,6 +2020,27 @@ def test_groupby_sum(self):
assert_allclose(expected_sum_axis1, grouped.reduce(np.sum, 'y'))
assert_allclose(expected_sum_axis1, grouped.sum('y'))

def test_groupby_warning(self):
array = self.make_groupby_example_array()
grouped = array.groupby('y')
with pytest.warns(FutureWarning):
grouped.sum()

@pytest.mark.skipif(LooseVersion(xr.__version__) < LooseVersion('0.12'),
reason="not to forget the behavior change")
def test_groupby_sum_default(self):
array = self.make_groupby_example_array()
grouped = array.groupby('abc')

expected_sum_all = Dataset(
{'foo': Variable(['x', 'abc'],
np.array([self.x[:, :9].sum(axis=-1),
self.x[:, 10:].sum(axis=-1),
self.x[:, 9:10].sum(axis=-1)]).T),
'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']

assert_allclose(expected_sum_all, grouped.sum())

def test_groupby_count(self):
array = DataArray(
[0, 0, np.nan, np.nan, 0, 0],
Expand Down Expand Up @@ -2099,9 +2121,9 @@ def test_groupby_math(self):
assert_identical(expected, actual)

grouped = array.groupby('abc')
expected_agg = (grouped.mean() - np.arange(3)).rename(None)
expected_agg = (grouped.mean(ALL_DIMS) - np.arange(3)).rename(None)
actual = grouped - DataArray(range(3), [('abc', ['a', 'b', 'c'])])
actual_agg = actual.groupby('abc').mean()
actual_agg = actual.groupby('abc').mean(ALL_DIMS)
assert_allclose(expected_agg, actual_agg)

with raises_regex(TypeError, 'only support binary ops'):
Expand Down Expand Up @@ -2175,7 +2197,7 @@ def test_groupby_multidim(self):
('lon', DataArray([5, 28, 23],
coords=[('lon', [30., 40., 50.])])),
('lat', DataArray([16, 40], coords=[('lat', [10., 20.])]))]:
actual_sum = array.groupby(dim).sum()
actual_sum = array.groupby(dim).sum(ALL_DIMS)
assert_identical(expected_sum, actual_sum)

def test_groupby_multidim_apply(self):
Expand Down
23 changes: 15 additions & 8 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import xarray as xr
from xarray import (
DataArray, Dataset, IndexVariable, MergeError, Variable, align, backends,
broadcast, open_dataset, set_options)
broadcast, open_dataset, set_options, ALL_DIMS)
from xarray.core import indexing, npcompat, utils
from xarray.core.common import full_like
from xarray.core.pycompat import (
Expand Down Expand Up @@ -2648,20 +2648,28 @@ def test_groupby_reduce(self):

expected = data.mean('y')
expected['yonly'] = expected['yonly'].variable.set_dims({'x': 3})
actual = data.groupby('x').mean()
actual = data.groupby('x').mean(ALL_DIMS)
assert_allclose(expected, actual)

actual = data.groupby('x').mean('y')
assert_allclose(expected, actual)

letters = data['letters']
expected = Dataset({'xy': data['xy'].groupby(letters).mean(),
expected = Dataset({'xy': data['xy'].groupby(letters).mean(ALL_DIMS),
'xonly': (data['xonly'].mean().variable
.set_dims({'letters': 2})),
'yonly': data['yonly'].groupby(letters).mean()})
actual = data.groupby('letters').mean()
actual = data.groupby('letters').mean(ALL_DIMS)
assert_allclose(expected, actual)

def test_groupby_warn(self):
data = Dataset({'xy': (['x', 'y'], np.random.randn(3, 4)),
'xonly': ('x', np.random.randn(3)),
'yonly': ('y', np.random.randn(4)),
'letters': ('y', ['a', 'a', 'b', 'b'])})
with pytest.warns(FutureWarning):
data.groupby('x').mean()

def test_groupby_math(self):
def reorder_dims(x):
return x.transpose('dim1', 'dim2', 'dim3', 'time')
Expand Down Expand Up @@ -2716,7 +2724,7 @@ def test_groupby_math_virtual(self):
ds = Dataset({'x': ('t', [1, 2, 3])},
{'t': pd.date_range('20100101', periods=3)})
grouped = ds.groupby('t.day')
actual = grouped - grouped.mean()
actual = grouped - grouped.mean(ALL_DIMS)
expected = Dataset({'x': ('t', [0, 0, 0])},
ds[['t', 't.day']])
assert_identical(actual, expected)
Expand All @@ -2725,18 +2733,17 @@ def test_groupby_nan(self):
# nan should be excluded from groupby
ds = Dataset({'foo': ('x', [1, 2, 3, 4])},
{'bar': ('x', [1, 1, 2, np.nan])})
actual = ds.groupby('bar').mean()
actual = ds.groupby('bar').mean(ALL_DIMS)
expected = Dataset({'foo': ('bar', [1.5, 3]), 'bar': [1, 2]})
assert_identical(actual, expected)

def test_groupby_order(self):
# groupby should preserve variables order

ds = Dataset()
for vn in ['a', 'b', 'c']:
ds[vn] = DataArray(np.arange(10), dims=['t'])
data_vars_ref = list(ds.data_vars.keys())
ds = ds.groupby('t').mean()
ds = ds.groupby('t').mean(ALL_DIMS)
data_vars = list(ds.data_vars.keys())
assert data_vars == data_vars_ref
# coords are now at the end of the list, so the test below fails
Expand Down