Skip to content

Commit

Permalink
Remove the old syntax for resample. (#2541)
Browse files Browse the repository at this point in the history
This has been deprecated since xarray 0.10.

I also added support for passing a mapping ``{dim: freq}`` as the first
argument.
  • Loading branch information
shoyer authored and max-sixty committed Nov 5, 2018
1 parent 38399cc commit 421be44
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 124 deletions.
9 changes: 9 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ Breaking changes
includes only data variables.
- ``DataArray.__contains__`` (used by Python's ``in`` operator) now checks
array data, not coordinates.
- The old resample syntax from before xarray 0.10, e.g.,
``data.resample('1D', dim='time', how='mean')``, is no longer supported will
raise an error in most cases. You need to use the new resample syntax
instead, e.g., ``data.resample(time='1D').mean()`` or
``data.resample({'time': '1D'}).mean()``.
- Xarray's storage backends now automatically open and close files when
necessary, rather than requiring opening a file with ``autoclose=True``. A
global least-recently-used cache is used to store open files; the default
Expand Down Expand Up @@ -111,6 +116,10 @@ Enhancements
python driver and *ecCodes* C-library. (:issue:`2475`)
By `Alessandro Amici <https://github.com/alexamici>`_,
sponsored by `ECMWF <https://github.com/ecmwf>`_.
- Resample now supports a dictionary mapping from dimension to frequency as
its first argument, e.g., ``data.resample({'time': '1D'}).mean()``. This is
consistent with other xarray functions that accept either dictionaries or
keyword arguments. By `Stephan Hoyer <https://github.com/shoyer>`_.

Bug fixes
~~~~~~~~~
Expand Down
94 changes: 21 additions & 73 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs):
Set the labels at the center of the window.
**dim_kwargs : optional
The keyword arguments form of ``dim``.
One of dim or dim_kwarg must be provided.
One of dim or dim_kwargs must be provided.
Returns
-------
Expand Down Expand Up @@ -591,15 +591,17 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs):
return self._rolling_cls(self, dim, min_periods=min_periods,
center=center)

def resample(self, freq=None, dim=None, how=None, skipna=None,
closed=None, label=None, base=0, keep_attrs=None, **indexer):
def resample(self, indexer=None, skipna=None, closed=None, label=None,
base=0, keep_attrs=None, **indexer_kwargs):
"""Returns a Resample object for performing resampling operations.
Handles both downsampling and upsampling. If any intervals contain no
values from the original object, they will be given the value ``NaN``.
Parameters
----------
indexer : {dim: freq}, optional
Mapping from the dimension name to resample frequency.
skipna : bool, optional
Whether to skip missing values when aggregating in downsampling.
closed : 'left' or 'right', optional
Expand All @@ -614,9 +616,9 @@ def resample(self, freq=None, dim=None, how=None, skipna=None,
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
**indexer : {dim: freq}
Dictionary with a key indicating the dimension name to resample
over and a value corresponding to the resampling frequency.
**indexer_kwargs : {dim: freq}
The keyword arguments form of ``indexer``.
One of indexer or indexer_kwargs must be provided.
Returns
-------
Expand Down Expand Up @@ -664,30 +666,24 @@ def resample(self, freq=None, dim=None, how=None, skipna=None,
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)

if dim is not None:
if how is None:
how = 'mean'
return self._resample_immediately(freq, dim, how, skipna, closed,
label, base, keep_attrs)
# note: the second argument (now 'skipna') use to be 'dim'
if ((skipna is not None and not isinstance(skipna, bool))
or ('how' in indexer_kwargs and 'how' not in self.dims)
or ('dim' in indexer_kwargs and 'dim' not in self.dims)):
raise TypeError('resample() no longer supports the `how` or '
'`dim` arguments. Instead call methods on resample '
"objects, e.g., data.resample(time='1D').mean()")

indexer = either_dict_or_kwargs(indexer, indexer_kwargs, 'resample')

if (how is not None) and indexer:
raise TypeError("If passing an 'indexer' then 'dim' "
"and 'how' should not be used")

# More than one indexer is ambiguous, but we do in fact need one if
# "dim" was not provided, until the old API is fully deprecated
if len(indexer) != 1:
raise ValueError(
"Resampling only supported along single dimensions."
)
dim, freq = indexer.popitem()

if isinstance(dim, basestring):
dim_name = dim
dim = self[dim]
else:
raise TypeError("Dimension name should be a string; "
"was passed %r" % dim)
dim_name = dim
dim_coord = self[dim]

if isinstance(self.indexes[dim_name], CFTimeIndex):
raise NotImplementedError(
Expand All @@ -702,63 +698,15 @@ def resample(self, freq=None, dim=None, how=None, skipna=None,
'errors.'
)

group = DataArray(dim, [(dim.dims, dim)], name=RESAMPLE_DIM)
group = DataArray(dim_coord, coords=dim_coord.coords,
dims=dim_coord.dims, name=RESAMPLE_DIM)
grouper = pd.Grouper(freq=freq, closed=closed, label=label, base=base)
resampler = self._resample_cls(self, group=group, dim=dim_name,
grouper=grouper,
resample_dim=RESAMPLE_DIM)

return resampler

def _resample_immediately(self, freq, dim, how, skipna,
closed, label, base, keep_attrs):
"""Implement the original version of .resample() which immediately
executes the desired resampling operation. """
from .dataarray import DataArray
from ..coding.cftimeindex import CFTimeIndex

RESAMPLE_DIM = '__resample_dim__'

warnings.warn("\n.resample() has been modified to defer "
"calculations. Instead of passing 'dim' and "
"how=\"{how}\", instead consider using "
".resample({dim}=\"{freq}\").{how}('{dim}') ".format(
dim=dim, freq=freq, how=how),
FutureWarning, stacklevel=3)

if isinstance(self.indexes[dim], CFTimeIndex):
raise NotImplementedError(
'Resample is currently not supported along a dimension '
'indexed by a CFTimeIndex. For certain kinds of downsampling '
'it may be possible to work around this by converting your '
'time index to a DatetimeIndex using '
'CFTimeIndex.to_datetimeindex. Use caution when doing this '
'however, because switching to a DatetimeIndex from a '
'CFTimeIndex with a non-standard calendar entails a change '
'in the calendar type, which could lead to subtle and silent '
'errors.'
)

if isinstance(dim, basestring):
dim = self[dim]

group = DataArray(dim, [(dim.dims, dim)], name=RESAMPLE_DIM)
grouper = pd.Grouper(freq=freq, how=how, closed=closed, label=label,
base=base)
gb = self._groupby_cls(self, group, grouper=grouper)
if isinstance(how, basestring):
f = getattr(gb, how)
if how in ['first', 'last']:
result = f(skipna=skipna, keep_attrs=keep_attrs)
elif how == 'count':
result = f(dim=dim.name, keep_attrs=keep_attrs)
else:
result = f(dim=dim.name, skipna=skipna, keep_attrs=keep_attrs)
else:
result = gb.reduce(how, dim=dim.name, keep_attrs=keep_attrs)
result = result.rename({RESAMPLE_DIM: dim.name})
return result

def where(self, cond, other=dtypes.NA, drop=False):
"""Filter elements from this object according to a condition.
Expand Down
53 changes: 12 additions & 41 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2358,53 +2358,24 @@ def test_resample_drop_nondim_coords(self):
actual = array.resample(time="1H").interpolate('linear')
assert 'tc' not in actual.coords

def test_resample_old_vs_new_api(self):
def test_resample_keep_attrs(self):
times = pd.date_range('2000-01-01', freq='6H', periods=10)
array = DataArray(np.ones(10), [('time', times)])
array.attrs['meta'] = 'data'

# Simple mean
with pytest.warns(FutureWarning):
old_mean = array.resample('1D', 'time', how='mean')
new_mean = array.resample(time='1D').mean()
assert_identical(old_mean, new_mean)

# Mean, while keeping attributes
attr_array = array.copy()
attr_array.attrs['meta'] = 'data'

with pytest.warns(FutureWarning):
old_mean = attr_array.resample('1D', dim='time', how='mean',
keep_attrs=True)
new_mean = attr_array.resample(time='1D').mean(keep_attrs=True)
assert old_mean.attrs == new_mean.attrs
assert_identical(old_mean, new_mean)
result = array.resample(time='1D').mean(keep_attrs=True)
expected = DataArray([1, 1, 1], [('time', times[::4])],
attrs=array.attrs)
assert_identical(result, expected)

# Mean, with NaN to skip
nan_array = array.copy()
nan_array[1] = np.nan
def test_resample_skipna(self):
times = pd.date_range('2000-01-01', freq='6H', periods=10)
array = DataArray(np.ones(10), [('time', times)])
array[1] = np.nan

with pytest.warns(FutureWarning):
old_mean = nan_array.resample('1D', 'time', how='mean',
skipna=False)
new_mean = nan_array.resample(time='1D').mean(skipna=False)
result = array.resample(time='1D').mean(skipna=False)
expected = DataArray([np.nan, 1, 1], [('time', times[::4])])
assert_identical(old_mean, expected)
assert_identical(new_mean, expected)

# Try other common resampling methods
resampler = array.resample(time='1D')
for method in ['mean', 'median', 'sum', 'first', 'last', 'count']:
# Discard attributes on the call using the new api to match
# convention from old api
new_api = getattr(resampler, method)(keep_attrs=False)
with pytest.warns(FutureWarning):
old_api = array.resample('1D', dim='time', how=method)
assert_identical(new_api, old_api)
for method in [np.mean, np.sum, np.max, np.min]:
new_api = resampler.reduce(method)
with pytest.warns(FutureWarning):
old_api = array.resample('1D', dim='time', how=method)
assert_identical(new_api, old_api)
assert_identical(result, expected)

def test_upsample(self):
times = pd.date_range('2000-01-01', freq='6H', periods=5)
Expand Down
19 changes: 9 additions & 10 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2858,22 +2858,21 @@ def test_resample_drop_nondim_coords(self):
actual = ds.resample(time="1H").interpolate('linear')
assert 'tc' not in actual.coords

def test_resample_old_vs_new_api(self):
def test_resample_old_api(self):

times = pd.date_range('2000-01-01', freq='6H', periods=10)
ds = Dataset({'foo': (['time', 'x', 'y'], np.random.randn(10, 5, 3)),
'bar': ('time', np.random.randn(10), {'meta': 'data'}),
'time': times})
ds.attrs['dsmeta'] = 'dsdata'

for method in ['mean', 'sum', 'count', 'first', 'last']:
resampler = ds.resample(time='1D')
# Discard attributes on the call using the new api to match
# convention from old api
new_api = getattr(resampler, method)(keep_attrs=False)
with pytest.warns(FutureWarning):
old_api = ds.resample('1D', dim='time', how=method)
assert_identical(new_api, old_api)
with raises_regex(TypeError, r'resample\(\) no longer supports'):
ds.resample('1D', 'time')

with raises_regex(TypeError, r'resample\(\) no longer supports'):
ds.resample('1D', dim='time', how='mean')

with raises_regex(TypeError, r'resample\(\) no longer supports'):
ds.resample('1D', dim='time')

def test_to_array(self):
ds = Dataset(OrderedDict([('a', 1), ('b', ('x', [1, 2, 3]))]),
Expand Down

0 comments on commit 421be44

Please sign in to comment.