diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8632d97be4b..2347d880350 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -57,6 +57,11 @@ Breaking changes includes only data variables. - ``DataArray.__contains__`` (used by Python's ``in`` operator) now checks array data, not coordinates. + - The old resample syntax from before xarray 0.10, e.g., + ``data.resample('1D', dim='time', how='mean')``, is no longer supported will + raise an error in most cases. You need to use the new resample syntax + instead, e.g., ``data.resample(time='1D').mean()`` or + ``data.resample({'time': '1D'}).mean()``. - Xarray's storage backends now automatically open and close files when necessary, rather than requiring opening a file with ``autoclose=True``. A global least-recently-used cache is used to store open files; the default @@ -111,6 +116,10 @@ Enhancements python driver and *ecCodes* C-library. (:issue:`2475`) By `Alessandro Amici `_, sponsored by `ECMWF `_. +- Resample now supports a dictionary mapping from dimension to frequency as + its first argument, e.g., ``data.resample({'time': '1D'}).mean()``. This is + consistent with other xarray functions that accept either dictionaries or + keyword arguments. By `Stephan Hoyer `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/core/common.py b/xarray/core/common.py index 508a19b7115..34057e3715d 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -548,7 +548,7 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs): Set the labels at the center of the window. **dim_kwargs : optional The keyword arguments form of ``dim``. - One of dim or dim_kwarg must be provided. + One of dim or dim_kwargs must be provided. Returns ------- @@ -591,8 +591,8 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs): return self._rolling_cls(self, dim, min_periods=min_periods, center=center) - def resample(self, freq=None, dim=None, how=None, skipna=None, - closed=None, label=None, base=0, keep_attrs=None, **indexer): + def resample(self, indexer=None, skipna=None, closed=None, label=None, + base=0, keep_attrs=None, **indexer_kwargs): """Returns a Resample object for performing resampling operations. Handles both downsampling and upsampling. If any intervals contain no @@ -600,6 +600,8 @@ def resample(self, freq=None, dim=None, how=None, skipna=None, Parameters ---------- + indexer : {dim: freq}, optional + Mapping from the dimension name to resample frequency. skipna : bool, optional Whether to skip missing values when aggregating in downsampling. closed : 'left' or 'right', optional @@ -614,9 +616,9 @@ def resample(self, freq=None, dim=None, how=None, skipna=None, If True, the object's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. - **indexer : {dim: freq} - Dictionary with a key indicating the dimension name to resample - over and a value corresponding to the resampling frequency. + **indexer_kwargs : {dim: freq} + The keyword arguments form of ``indexer``. + One of indexer or indexer_kwargs must be provided. Returns ------- @@ -664,30 +666,24 @@ def resample(self, freq=None, dim=None, how=None, skipna=None, if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) - if dim is not None: - if how is None: - how = 'mean' - return self._resample_immediately(freq, dim, how, skipna, closed, - label, base, keep_attrs) + # note: the second argument (now 'skipna') use to be 'dim' + if ((skipna is not None and not isinstance(skipna, bool)) + or ('how' in indexer_kwargs and 'how' not in self.dims) + or ('dim' in indexer_kwargs and 'dim' not in self.dims)): + raise TypeError('resample() no longer supports the `how` or ' + '`dim` arguments. Instead call methods on resample ' + "objects, e.g., data.resample(time='1D').mean()") + + indexer = either_dict_or_kwargs(indexer, indexer_kwargs, 'resample') - if (how is not None) and indexer: - raise TypeError("If passing an 'indexer' then 'dim' " - "and 'how' should not be used") - - # More than one indexer is ambiguous, but we do in fact need one if - # "dim" was not provided, until the old API is fully deprecated if len(indexer) != 1: raise ValueError( "Resampling only supported along single dimensions." ) dim, freq = indexer.popitem() - if isinstance(dim, basestring): - dim_name = dim - dim = self[dim] - else: - raise TypeError("Dimension name should be a string; " - "was passed %r" % dim) + dim_name = dim + dim_coord = self[dim] if isinstance(self.indexes[dim_name], CFTimeIndex): raise NotImplementedError( @@ -702,7 +698,8 @@ def resample(self, freq=None, dim=None, how=None, skipna=None, 'errors.' ) - group = DataArray(dim, [(dim.dims, dim)], name=RESAMPLE_DIM) + group = DataArray(dim_coord, coords=dim_coord.coords, + dims=dim_coord.dims, name=RESAMPLE_DIM) grouper = pd.Grouper(freq=freq, closed=closed, label=label, base=base) resampler = self._resample_cls(self, group=group, dim=dim_name, grouper=grouper, @@ -710,55 +707,6 @@ def resample(self, freq=None, dim=None, how=None, skipna=None, return resampler - def _resample_immediately(self, freq, dim, how, skipna, - closed, label, base, keep_attrs): - """Implement the original version of .resample() which immediately - executes the desired resampling operation. """ - from .dataarray import DataArray - from ..coding.cftimeindex import CFTimeIndex - - RESAMPLE_DIM = '__resample_dim__' - - warnings.warn("\n.resample() has been modified to defer " - "calculations. Instead of passing 'dim' and " - "how=\"{how}\", instead consider using " - ".resample({dim}=\"{freq}\").{how}('{dim}') ".format( - dim=dim, freq=freq, how=how), - FutureWarning, stacklevel=3) - - if isinstance(self.indexes[dim], CFTimeIndex): - raise NotImplementedError( - 'Resample is currently not supported along a dimension ' - 'indexed by a CFTimeIndex. For certain kinds of downsampling ' - 'it may be possible to work around this by converting your ' - 'time index to a DatetimeIndex using ' - 'CFTimeIndex.to_datetimeindex. Use caution when doing this ' - 'however, because switching to a DatetimeIndex from a ' - 'CFTimeIndex with a non-standard calendar entails a change ' - 'in the calendar type, which could lead to subtle and silent ' - 'errors.' - ) - - if isinstance(dim, basestring): - dim = self[dim] - - group = DataArray(dim, [(dim.dims, dim)], name=RESAMPLE_DIM) - grouper = pd.Grouper(freq=freq, how=how, closed=closed, label=label, - base=base) - gb = self._groupby_cls(self, group, grouper=grouper) - if isinstance(how, basestring): - f = getattr(gb, how) - if how in ['first', 'last']: - result = f(skipna=skipna, keep_attrs=keep_attrs) - elif how == 'count': - result = f(dim=dim.name, keep_attrs=keep_attrs) - else: - result = f(dim=dim.name, skipna=skipna, keep_attrs=keep_attrs) - else: - result = gb.reduce(how, dim=dim.name, keep_attrs=keep_attrs) - result = result.rename({RESAMPLE_DIM: dim.name}) - return result - def where(self, cond, other=dtypes.NA, drop=False): """Filter elements from this object according to a condition. diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 2b35921fae8..87ee60715a1 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2358,53 +2358,24 @@ def test_resample_drop_nondim_coords(self): actual = array.resample(time="1H").interpolate('linear') assert 'tc' not in actual.coords - def test_resample_old_vs_new_api(self): + def test_resample_keep_attrs(self): times = pd.date_range('2000-01-01', freq='6H', periods=10) array = DataArray(np.ones(10), [('time', times)]) + array.attrs['meta'] = 'data' - # Simple mean - with pytest.warns(FutureWarning): - old_mean = array.resample('1D', 'time', how='mean') - new_mean = array.resample(time='1D').mean() - assert_identical(old_mean, new_mean) - - # Mean, while keeping attributes - attr_array = array.copy() - attr_array.attrs['meta'] = 'data' - - with pytest.warns(FutureWarning): - old_mean = attr_array.resample('1D', dim='time', how='mean', - keep_attrs=True) - new_mean = attr_array.resample(time='1D').mean(keep_attrs=True) - assert old_mean.attrs == new_mean.attrs - assert_identical(old_mean, new_mean) + result = array.resample(time='1D').mean(keep_attrs=True) + expected = DataArray([1, 1, 1], [('time', times[::4])], + attrs=array.attrs) + assert_identical(result, expected) - # Mean, with NaN to skip - nan_array = array.copy() - nan_array[1] = np.nan + def test_resample_skipna(self): + times = pd.date_range('2000-01-01', freq='6H', periods=10) + array = DataArray(np.ones(10), [('time', times)]) + array[1] = np.nan - with pytest.warns(FutureWarning): - old_mean = nan_array.resample('1D', 'time', how='mean', - skipna=False) - new_mean = nan_array.resample(time='1D').mean(skipna=False) + result = array.resample(time='1D').mean(skipna=False) expected = DataArray([np.nan, 1, 1], [('time', times[::4])]) - assert_identical(old_mean, expected) - assert_identical(new_mean, expected) - - # Try other common resampling methods - resampler = array.resample(time='1D') - for method in ['mean', 'median', 'sum', 'first', 'last', 'count']: - # Discard attributes on the call using the new api to match - # convention from old api - new_api = getattr(resampler, method)(keep_attrs=False) - with pytest.warns(FutureWarning): - old_api = array.resample('1D', dim='time', how=method) - assert_identical(new_api, old_api) - for method in [np.mean, np.sum, np.max, np.min]: - new_api = resampler.reduce(method) - with pytest.warns(FutureWarning): - old_api = array.resample('1D', dim='time', how=method) - assert_identical(new_api, old_api) + assert_identical(result, expected) def test_upsample(self): times = pd.date_range('2000-01-01', freq='6H', periods=5) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 8c0f8508df9..89ea3ba78a0 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2858,22 +2858,21 @@ def test_resample_drop_nondim_coords(self): actual = ds.resample(time="1H").interpolate('linear') assert 'tc' not in actual.coords - def test_resample_old_vs_new_api(self): + def test_resample_old_api(self): times = pd.date_range('2000-01-01', freq='6H', periods=10) ds = Dataset({'foo': (['time', 'x', 'y'], np.random.randn(10, 5, 3)), 'bar': ('time', np.random.randn(10), {'meta': 'data'}), 'time': times}) - ds.attrs['dsmeta'] = 'dsdata' - for method in ['mean', 'sum', 'count', 'first', 'last']: - resampler = ds.resample(time='1D') - # Discard attributes on the call using the new api to match - # convention from old api - new_api = getattr(resampler, method)(keep_attrs=False) - with pytest.warns(FutureWarning): - old_api = ds.resample('1D', dim='time', how=method) - assert_identical(new_api, old_api) + with raises_regex(TypeError, r'resample\(\) no longer supports'): + ds.resample('1D', 'time') + + with raises_regex(TypeError, r'resample\(\) no longer supports'): + ds.resample('1D', dim='time', how='mean') + + with raises_regex(TypeError, r'resample\(\) no longer supports'): + ds.resample('1D', dim='time') def test_to_array(self): ds = Dataset(OrderedDict([('a', 1), ('b', ('x', [1, 2, 3]))]),