From 0e8debfe28286b5fe1f3d27e8dcc8466a62aca6d Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 7 Nov 2019 15:13:50 -0500 Subject: [PATCH] drop_vars; deprecate drop for variables (#3475) * Deprecate drop for vars, in favor of drop_vars * docs tweaks * handle scalars as vars * allow warning in old whatsnew * add drop_sel, adjust deprecations based on comments * whatsnew * docs * old-whatsnew * docstring * pendingdeprecationwarning * whatsnew * whatsnew * move units tests to drop_sel * is_scalar (but retain isinstance for mypy) --- doc/data-structures.rst | 4 +- doc/indexing.rst | 6 +- doc/whats-new.rst | 7 ++ xarray/core/concat.py | 2 +- xarray/core/dataarray.py | 78 ++++++++---- xarray/core/dataset.py | 180 +++++++++++++++------------- xarray/core/groupby.py | 2 +- xarray/core/merge.py | 2 +- xarray/core/resample.py | 6 +- xarray/tests/test_backends.py | 8 +- xarray/tests/test_dask.py | 10 +- xarray/tests/test_dataarray.py | 45 +++---- xarray/tests/test_dataset.py | 115 +++++++++++------- xarray/tests/test_duck_array_ops.py | 3 +- xarray/tests/test_interp.py | 2 +- xarray/tests/test_plot.py | 6 +- xarray/tests/test_units.py | 6 +- 17 files changed, 286 insertions(+), 196 deletions(-) diff --git a/doc/data-structures.rst b/doc/data-structures.rst index d5567f4863e..93cdc7e9765 100644 --- a/doc/data-structures.rst +++ b/doc/data-structures.rst @@ -393,14 +393,14 @@ methods (like pandas) for transforming datasets into new objects. For removing variables, you can select and drop an explicit list of variables by indexing with a list of names or using the -:py:meth:`~xarray.Dataset.drop` methods to return a new ``Dataset``. These +:py:meth:`~xarray.Dataset.drop_vars` methods to return a new ``Dataset``. These operations keep around coordinates: .. ipython:: python ds[['temperature']] ds[['temperature', 'temperature_double']] - ds.drop('temperature') + ds.drop_vars('temperature') To remove a dimension, you can use :py:meth:`~xarray.Dataset.drop_dims` method. Any variables using that dimension are dropped: diff --git a/doc/indexing.rst b/doc/indexing.rst index 9ee8f1dddf8..ace960689a8 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -232,14 +232,14 @@ Using indexing to *assign* values to a subset of dataset (e.g., Dropping labels and dimensions ------------------------------ -The :py:meth:`~xarray.Dataset.drop` method returns a new object with the listed +The :py:meth:`~xarray.Dataset.drop_sel` method returns a new object with the listed index labels along a dimension dropped: .. ipython:: python - ds.drop(space=['IN', 'IL']) + ds.drop_sel(space=['IN', 'IL']) -``drop`` is both a ``Dataset`` and ``DataArray`` method. +``drop_sel`` is both a ``Dataset`` and ``DataArray`` method. Use :py:meth:`~xarray.Dataset.drop_dims` to drop a full dimension from a Dataset. Any variables with these dimensions are also dropped: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index dcaab011e67..0906058469d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,6 +38,12 @@ Breaking changes New Features ~~~~~~~~~~~~ +- :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` have been added for dropping labels. + :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for + dropping variables (including coordinates). The existing ``drop`` methods remain as a backward compatible + option for dropping either lables or variables, but using the more specific methods is encouraged. + (:pull:`3475`) + By `Maximilian Roos `_ - :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (`...`) to represent all 'other' dimensions. For example, to move one dimension to the front, use `.transpose('x', ...)`. (:pull:`3421`) @@ -3752,6 +3758,7 @@ Enhancements explicitly listed variables or index labels: .. ipython:: python + :okwarning: # drop variables ds = xray.Dataset({'x': 0, 'y': 1}) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index c26153eb0d8..5b4fc078236 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -388,7 +388,7 @@ def ensure_common_dims(vars): result = result.set_coords(coord_names) result.encoding = result_encoding - result = result.drop(unlabeled_dims, errors="ignore") + result = result.drop_vars(unlabeled_dims, errors="ignore") if coord is not None: # add concat dimension last to ensure that its in the final Dataset diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 35ee90fb5c8..d2d37871ee9 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -16,7 +16,6 @@ TypeVar, Union, cast, - overload, ) import numpy as np @@ -53,7 +52,7 @@ from .formatting import format_item from .indexes import Indexes, default_indexes from .options import OPTIONS -from .utils import Default, ReprObject, _default, _check_inplace, either_dict_or_kwargs +from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs from .variable import ( IndexVariable, Variable, @@ -249,7 +248,7 @@ class DataArray(AbstractArray, DataWithCoords): Dictionary for holding arbitrary metadata. """ - _accessors: Optional[Dict[str, Any]] + _accessors: Optional[Dict[str, Any]] # noqa _coords: Dict[Any, Variable] _indexes: Optional[Dict[Hashable, pd.Index]] _name: Optional[Hashable] @@ -1890,41 +1889,72 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra def T(self) -> "DataArray": return self.transpose() - # Drop coords - @overload - def drop( - self, labels: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" + def drop_vars( + self, names: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" ) -> "DataArray": - ... + """Drop variables from this DataArray. + + Parameters + ---------- + names : hashable or iterable of hashables + Name(s) of variables to drop. + errors: {'raise', 'ignore'}, optional + If 'raise' (default), raises a ValueError error if any of the variable + passed are not in the dataset. If 'ignore', any given names that are in the + DataArray are dropped and no error is raised. + + Returns + ------- + dropped : Dataset + + """ + ds = self._to_temp_dataset().drop_vars(names, errors=errors) + return self._from_temp_dataset(ds) - # Drop index labels along dimension - @overload # noqa: F811 def drop( - self, labels: Any, dim: Hashable, *, errors: str = "raise" # array-like + self, + labels: Mapping = None, + dim: Hashable = None, + *, + errors: str = "raise", + **labels_kwargs, ) -> "DataArray": - ... + """Backward compatible method based on `drop_vars` and `drop_sel` - def drop(self, labels, dim=None, *, errors="raise"): # noqa: F811 - """Drop coordinates or index labels from this DataArray. + Using either `drop_vars` or `drop_sel` is encouraged + """ + ds = self._to_temp_dataset().drop(labels, dim, errors=errors) + return self._from_temp_dataset(ds) + + def drop_sel( + self, + labels: Mapping[Hashable, Any] = None, + *, + errors: str = "raise", + **labels_kwargs, + ) -> "DataArray": + """Drop index labels from this DataArray. Parameters ---------- - labels : hashable or sequence of hashables - Name(s) of coordinates or index labels to drop. - If dim is not None, labels can be any array-like. - dim : hashable, optional - Dimension along which to drop index labels. By default (if - ``dim is None``), drops coordinates rather than index labels. + labels : Mapping[Hashable, Any] + Index labels to drop errors: {'raise', 'ignore'}, optional If 'raise' (default), raises a ValueError error if - any of the coordinates or index labels passed are not - in the array. If 'ignore', any given labels that are in the - array are dropped and no error is raised. + any of the index labels passed are not + in the dataset. If 'ignore', any given labels that are in the + dataset are dropped and no error is raised. + **labels_kwargs : {dim: label, ...}, optional + The keyword arguments form of ``dim`` and ``labels`` + Returns ------- dropped : DataArray """ - ds = self._to_temp_dataset().drop(labels, dim, errors=errors) + if labels_kwargs or isinstance(labels, dict): + labels = either_dict_or_kwargs(labels, labels_kwargs, "drop") + + ds = self._to_temp_dataset().drop_sel(labels, errors=errors) return self._from_temp_dataset(ds) def dropna( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 978242e5f6b..2cadc90334c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -25,7 +25,6 @@ TypeVar, Union, cast, - overload, ) import numpy as np @@ -80,6 +79,7 @@ hashable, is_dict_like, is_list_like, + is_scalar, maybe_wrap_array, ) from .variable import IndexVariable, Variable, as_variable, broadcast_variables @@ -3519,39 +3519,98 @@ def _assert_all_in_dataset( "cannot be found in this dataset" ) - # Drop variables - @overload # noqa: F811 - def drop( - self, labels: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" + def drop_vars( + self, names: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" ) -> "Dataset": - ... + """Drop variables from this dataset. - # Drop index labels along dimension - @overload # noqa: F811 - def drop( - self, labels: Any, dim: Hashable, *, errors: str = "raise" # array-like - ) -> "Dataset": - ... + Parameters + ---------- + names : hashable or iterable of hashables + Name(s) of variables to drop. + errors: {'raise', 'ignore'}, optional + If 'raise' (default), raises a ValueError error if any of the variable + passed are not in the dataset. If 'ignore', any given names that are in the + dataset are dropped and no error is raised. - def drop( # noqa: F811 - self, labels=None, dim=None, *, errors="raise", **labels_kwargs - ): - """Drop variables or index labels from this dataset. + Returns + ------- + dropped : Dataset + + """ + # the Iterable check is required for mypy + if is_scalar(names) or not isinstance(names, Iterable): + names = {names} + else: + names = set(names) + if errors == "raise": + self._assert_all_in_dataset(names) + + variables = {k: v for k, v in self._variables.items() if k not in names} + coord_names = {k for k in self._coord_names if k in variables} + indexes = {k: v for k, v in self.indexes.items() if k not in names} + return self._replace_with_new_dims( + variables, coord_names=coord_names, indexes=indexes + ) + + def drop(self, labels=None, dim=None, *, errors="raise", **labels_kwargs): + """Backward compatible method based on `drop_vars` and `drop_sel` + + Using either `drop_vars` or `drop_sel` is encouraged + """ + if errors not in ["raise", "ignore"]: + raise ValueError('errors must be either "raise" or "ignore"') + + if is_dict_like(labels) and not isinstance(labels, dict): + warnings.warn( + "dropping coordinates using `drop` is be deprecated; use drop_vars.", + FutureWarning, + stacklevel=2, + ) + return self.drop_vars(labels, errors=errors) + + if labels_kwargs or isinstance(labels, dict): + if dim is not None: + raise ValueError("cannot specify dim and dict-like arguments.") + labels = either_dict_or_kwargs(labels, labels_kwargs, "drop") + + if dim is None and (is_list_like(labels) or is_scalar(labels)): + warnings.warn( + "dropping variables using `drop` will be deprecated; using drop_vars is encouraged.", + PendingDeprecationWarning, + stacklevel=2, + ) + return self.drop_vars(labels, errors=errors) + if dim is not None: + warnings.warn( + "dropping labels using list-like labels is deprecated; using " + "dict-like arguments with `drop_sel`, e.g. `ds.drop_sel(dim=[labels]).", + DeprecationWarning, + stacklevel=2, + ) + return self.drop_sel({dim: labels}, errors=errors, **labels_kwargs) + + warnings.warn( + "dropping labels using `drop` will be deprecated; using drop_sel is encouraged.", + PendingDeprecationWarning, + stacklevel=2, + ) + return self.drop_sel(labels, errors=errors) + + def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): + """Drop index labels from this dataset. Parameters ---------- - labels : hashable or iterable of hashables - Name(s) of variables or index labels to drop. - dim : None or hashable, optional - Dimension along which to drop index labels. By default (if - ``dim is None``), drops variables rather than index labels. + labels : Mapping[Hashable, Any] + Index labels to drop errors: {'raise', 'ignore'}, optional If 'raise' (default), raises a ValueError error if - any of the variable or index labels passed are not + any of the index labels passed are not in the dataset. If 'ignore', any given labels that are in the dataset are dropped and no error is raised. **labels_kwargs : {dim: label, ...}, optional - The keyword arguments form of ``dim`` and ``labels``. + The keyword arguments form of ``dim`` and ``labels` Returns ------- @@ -3562,7 +3621,7 @@ def drop( # noqa: F811 >>> data = np.random.randn(2, 3) >>> labels = ['a', 'b', 'c'] >>> ds = xr.Dataset({'A': (['x', 'y'], data), 'y': labels}) - >>> ds.drop(y=['a', 'c']) + >>> ds.drop_sel(y=['a', 'c']) Dimensions: (x: 2, y: 1) Coordinates: @@ -3570,7 +3629,7 @@ def drop( # noqa: F811 Dimensions without coordinates: x Data variables: A (x, y) float64 -0.3454 0.1734 - >>> ds.drop(y='b') + >>> ds.drop_sel(y='b') Dimensions: (x: 2, y: 2) Coordinates: @@ -3582,61 +3641,22 @@ def drop( # noqa: F811 if errors not in ["raise", "ignore"]: raise ValueError('errors must be either "raise" or "ignore"') - if is_dict_like(labels) and not isinstance(labels, dict): - warnings.warn( - "dropping coordinates using key values of dict-like labels is " - "deprecated; use drop_vars or a list of coordinates.", - FutureWarning, - stacklevel=2, - ) - if dim is not None and is_list_like(labels): - warnings.warn( - "dropping dimensions using list-like labels is deprecated; use " - "dict-like arguments.", - DeprecationWarning, - stacklevel=2, - ) + labels = either_dict_or_kwargs(labels, labels_kwargs, "drop") - if labels_kwargs or isinstance(labels, dict): - labels_kwargs = either_dict_or_kwargs(labels, labels_kwargs, "drop") - if dim is not None: - raise ValueError("cannot specify dim and dict-like arguments.") - ds = self - for dim, labels in labels_kwargs.items(): - ds = ds._drop_labels(labels, dim, errors=errors) - return ds - elif dim is None: - if isinstance(labels, str) or not isinstance(labels, Iterable): - labels = {labels} - else: - labels = set(labels) - return self._drop_vars(labels, errors=errors) - else: - return self._drop_labels(labels, dim, errors=errors) - - def _drop_labels(self, labels=None, dim=None, errors="raise"): - # Don't cast to set, as it would harm performance when labels - # is a large numpy array - if utils.is_scalar(labels): - labels = [labels] - labels = np.asarray(labels) - try: - index = self.indexes[dim] - except KeyError: - raise ValueError("dimension %r does not have coordinate labels" % dim) - new_index = index.drop(labels, errors=errors) - return self.loc[{dim: new_index}] - - def _drop_vars(self, names: set, errors: str = "raise") -> "Dataset": - if errors == "raise": - self._assert_all_in_dataset(names) - - variables = {k: v for k, v in self._variables.items() if k not in names} - coord_names = {k for k in self._coord_names if k in variables} - indexes = {k: v for k, v in self.indexes.items() if k not in names} - return self._replace_with_new_dims( - variables, coord_names=coord_names, indexes=indexes - ) + ds = self + for dim, labels_for_dim in labels.items(): + # Don't cast to set, as it would harm performance when labels + # is a large numpy array + if utils.is_scalar(labels_for_dim): + labels_for_dim = [labels_for_dim] + labels_for_dim = np.asarray(labels_for_dim) + try: + index = self.indexes[dim] + except KeyError: + raise ValueError("dimension %r does not have coordinate labels" % dim) + new_index = index.drop(labels_for_dim, errors=errors) + ds = ds.loc[{dim: new_index}] + return ds def drop_dims( self, drop_dims: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" @@ -3679,7 +3699,7 @@ def drop_dims( ) drop_vars = {k for k, v in self._variables.items() if set(v.dims) & drop_dims} - return self._drop_vars(drop_vars) + return self.drop_vars(drop_vars) def transpose(self, *dims: Hashable) -> "Dataset": """Return a new Dataset object with all array dimensions transposed. diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 209ac14184b..c8906e34737 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -775,7 +775,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): ) if np.asarray(q, dtype=np.float64).ndim == 0: - out = out.drop("quantile") + out = out.drop_vars("quantile") return out def reduce( diff --git a/xarray/core/merge.py b/xarray/core/merge.py index daf0c3b059f..10c7804d718 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -859,6 +859,6 @@ def dataset_update_method( if c not in value.dims and c in dataset.coords ] if coord_names: - other[key] = value.drop(coord_names) + other[key] = value.drop_vars(coord_names) return merge_core([dataset, other], priority_arg=1, indexes=dataset.indexes) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 998964273be..2cb1bd55e19 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -47,7 +47,7 @@ def _upsample(self, method, *args, **kwargs): if k == self._dim: continue if self._dim in v.dims: - self._obj = self._obj.drop(k) + self._obj = self._obj.drop_vars(k) if method == "asfreq": return self.mean(self._dim) @@ -146,7 +146,7 @@ def _interpolate(self, kind="linear"): dummy = self._obj.copy() for k, v in self._obj.coords.items(): if k != self._dim and self._dim in v.dims: - dummy = dummy.drop(k) + dummy = dummy.drop_vars(k) return dummy.interp( assume_sorted=True, method=kind, @@ -218,7 +218,7 @@ def apply(self, func, shortcut=False, args=(), **kwargs): # dimension, then we need to do so before we can rename the proxy # dimension we used. if self._dim in combined.coords: - combined = combined.drop(self._dim) + combined = combined.drop_vars(self._dim) if self._resample_dim in combined.dims: combined = combined.rename({self._resample_dim: self._dim}) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 9b000b82b03..de3a7eadab0 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -800,7 +800,7 @@ def equals_latlon(obj): assert "coordinates" not in ds["lat"].attrs assert "coordinates" not in ds["lon"].attrs - modified = original.drop(["temp", "precip"]) + modified = original.drop_vars(["temp", "precip"]) with self.roundtrip(modified) as actual: assert_identical(actual, modified) with create_tmp_file() as tmp_file: @@ -2177,7 +2177,7 @@ def test_cross_engine_read_write_netcdf4(self): # Drop dim3, because its labels include strings. These appear to be # not properly read with python-netCDF4, which converts them into # unicode instead of leaving them as bytes. - data = create_test_data().drop("dim3") + data = create_test_data().drop_vars("dim3") data.attrs["foo"] = "bar" valid_engines = ["netcdf4", "h5netcdf"] for write_engine in valid_engines: @@ -2344,7 +2344,7 @@ def test_open_twice(self): def test_open_fileobj(self): # open in-memory datasets instead of local file paths - expected = create_test_data().drop("dim3") + expected = create_test_data().drop_vars("dim3") expected.attrs["foo"] = "bar" with create_tmp_file() as tmp_file: expected.to_netcdf(tmp_file, engine="h5netcdf") @@ -4190,7 +4190,7 @@ def test_open_dataarray_options(self): with create_tmp_file() as tmp: data.to_netcdf(tmp) - expected = data.drop("y") + expected = data.drop_vars("y") with open_dataarray(tmp, drop_variables=["y"]) as loaded: assert_identical(expected, loaded) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 34115b29b23..fa8ae9991d7 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1129,11 +1129,11 @@ def test_map_blocks_to_array(map_ds): [ lambda x: x, lambda x: x.to_dataset(), - lambda x: x.drop("x"), + lambda x: x.drop_vars("x"), lambda x: x.expand_dims(k=[1, 2, 3]), lambda x: x.assign_coords(new_coord=("y", x.y * 2)), lambda x: x.astype(np.int32), - # TODO: [lambda x: x.isel(x=1).drop("x"), map_da], + # TODO: [lambda x: x.isel(x=1).drop_vars("x"), map_da], ], ) def test_map_blocks_da_transformations(func, map_da): @@ -1147,9 +1147,9 @@ def test_map_blocks_da_transformations(func, map_da): "func", [ lambda x: x, - lambda x: x.drop("cxy"), - lambda x: x.drop("a"), - lambda x: x.drop("x"), + lambda x: x.drop_vars("cxy"), + lambda x: x.drop_vars("a"), + lambda x: x.drop_vars("x"), lambda x: x.expand_dims(k=[1, 2, 3]), lambda x: x.rename({"a": "new1", "b": "new2"}), # TODO: [lambda x: x.isel(x=1)], diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 2c823b0c20a..acfe684d220 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -906,7 +906,7 @@ def test_sel_dataarray(self): assert_array_equal(actual, da.isel(x=[0, 1, 2])) assert "new_dim" in actual.dims assert "new_dim" in actual.coords - assert_equal(actual["new_dim"].drop("x"), ind["new_dim"]) + assert_equal(actual["new_dim"].drop_vars("x"), ind["new_dim"]) def test_sel_invalid_slice(self): array = DataArray(np.arange(10), [("x", np.arange(10))]) @@ -1660,7 +1660,7 @@ def test_expand_dims_with_greater_dim_size(self): coords=expected_coords, dims=list(expected_coords.keys()), attrs={"key": "entry"}, - ).drop(["y", "dim_0"]) + ).drop_vars(["y", "dim_0"]) assert_identical(expected, actual) # Test with kwargs instead of passing dict to dim arg. @@ -1677,7 +1677,7 @@ def test_expand_dims_with_greater_dim_size(self): }, dims=["dim_1", "x", "dim_0"], attrs={"key": "entry"}, - ).drop("dim_0") + ).drop_vars("dim_0") assert_identical(other_way_expected, other_way) def test_set_index(self): @@ -1993,7 +1993,7 @@ def test_stack_unstack(self): ) pd.util.testing.assert_index_equal(a, b) - actual = orig.stack(z=["x", "y"]).unstack("z").drop(["x", "y"]) + actual = orig.stack(z=["x", "y"]).unstack("z").drop_vars(["x", "y"]) assert_identical(orig, actual) dims = ["a", "b", "c", "d", "e"] @@ -2001,11 +2001,11 @@ def test_stack_unstack(self): stacked = orig.stack(ab=["a", "b"], cd=["c", "d"]) unstacked = stacked.unstack(["ab", "cd"]) - roundtripped = unstacked.drop(["a", "b", "c", "d"]).transpose(*dims) + roundtripped = unstacked.drop_vars(["a", "b", "c", "d"]).transpose(*dims) assert_identical(orig, roundtripped) unstacked = stacked.unstack() - roundtripped = unstacked.drop(["a", "b", "c", "d"]).transpose(*dims) + roundtripped = unstacked.drop_vars(["a", "b", "c", "d"]).transpose(*dims) assert_identical(orig, roundtripped) def test_stack_unstack_decreasing_coordinate(self): @@ -2109,40 +2109,43 @@ def test_drop_coordinates(self): expected = DataArray(np.random.randn(2, 3), dims=["x", "y"]) arr = expected.copy() arr.coords["z"] = 2 - actual = arr.drop("z") + actual = arr.drop_vars("z") assert_identical(expected, actual) with pytest.raises(ValueError): - arr.drop("not found") + arr.drop_vars("not found") - actual = expected.drop("not found", errors="ignore") + actual = expected.drop_vars("not found", errors="ignore") assert_identical(actual, expected) with raises_regex(ValueError, "cannot be found"): - arr.drop("w") + arr.drop_vars("w") - actual = expected.drop("w", errors="ignore") + actual = expected.drop_vars("w", errors="ignore") assert_identical(actual, expected) renamed = arr.rename("foo") with raises_regex(ValueError, "cannot be found"): - renamed.drop("foo") + renamed.drop_vars("foo") - actual = renamed.drop("foo", errors="ignore") + actual = renamed.drop_vars("foo", errors="ignore") assert_identical(actual, renamed) def test_drop_index_labels(self): arr = DataArray(np.random.randn(2, 3), coords={"y": [0, 1, 2]}, dims=["x", "y"]) - actual = arr.drop([0, 1], dim="y") + actual = arr.drop_sel(y=[0, 1]) expected = arr[:, 2:] assert_identical(actual, expected) with raises_regex((KeyError, ValueError), "not .* in axis"): - actual = arr.drop([0, 1, 3], dim="y") + actual = arr.drop_sel(y=[0, 1, 3]) - actual = arr.drop([0, 1, 3], dim="y", errors="ignore") + actual = arr.drop_sel(y=[0, 1, 3], errors="ignore") assert_identical(actual, expected) + with pytest.warns(DeprecationWarning): + arr.drop([0, 1, 3], dim="y", errors="ignore") + def test_dropna(self): x = np.random.randn(4, 4) x[::2, 0] = np.nan @@ -3360,7 +3363,7 @@ def test_to_pandas(self): da = DataArray(np.random.randn(*shape), dims=dims) with warnings.catch_warnings(): warnings.filterwarnings("ignore", r"\W*Panel is deprecated") - roundtripped = DataArray(da.to_pandas()).drop(dims) + roundtripped = DataArray(da.to_pandas()).drop_vars(dims) assert_identical(da, roundtripped) with raises_regex(ValueError, "cannot convert"): @@ -3411,11 +3414,13 @@ def test_to_and_from_series(self): assert_array_equal(expected.index.values, actual.index.values) assert "foo" == actual.name # test roundtrip - assert_identical(self.dv, DataArray.from_series(actual).drop(["x", "y"])) + assert_identical(self.dv, DataArray.from_series(actual).drop_vars(["x", "y"])) # test name is None actual.name = None expected_da = self.dv.rename(None) - assert_identical(expected_da, DataArray.from_series(actual).drop(["x", "y"])) + assert_identical( + expected_da, DataArray.from_series(actual).drop_vars(["x", "y"]) + ) @requires_sparse def test_from_series_sparse(self): @@ -3478,7 +3483,7 @@ def test_to_and_from_dict(self): # and the most bare bones representation still roundtrips d = {"name": "foo", "dims": ("x", "y"), "data": array.values} - assert_identical(array.drop("x"), DataArray.from_dict(d)) + assert_identical(array.drop_vars("x"), DataArray.from_dict(d)) # missing a dims in the coords d = { diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index b9fa20fab26..50e78c9f685 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -322,7 +322,7 @@ def __repr__(self): def test_info(self): ds = create_test_data(seed=123) - ds = ds.drop("dim3") # string type prints differently in PY2 vs PY3 + ds = ds.drop_vars("dim3") # string type prints differently in PY2 vs PY3 ds.attrs["unicode_attr"] = "ba®" ds.attrs["string_attr"] = "bar" @@ -509,7 +509,9 @@ def test_constructor_compat(self): {"c": (("x", "y"), np.zeros((2, 3))), "x": [0, 1]}, ) - actual = Dataset({"a": original["a"][:, 0], "b": original["a"][0].drop("x")}) + actual = Dataset( + {"a": original["a"][:, 0], "b": original["a"][0].drop_vars("x")} + ) assert_identical(expected, actual) data = {"x": DataArray(0, coords={"y": 3}), "y": ("z", [1, 1, 1])} @@ -775,9 +777,9 @@ def test_coords_set(self): one_coord.reset_coords("x") actual = all_coords.reset_coords("zzz", drop=True) - expected = all_coords.drop("zzz") + expected = all_coords.drop_vars("zzz") assert_identical(expected, actual) - expected = two_coords.drop("zzz") + expected = two_coords.drop_vars("zzz") assert_identical(expected, actual) def test_coords_to_dataset(self): @@ -954,7 +956,7 @@ def test_dask_is_lazy(self): ds.fillna(0) ds.rename({"dim1": "foobar"}) ds.set_coords("var1") - ds.drop("var1") + ds.drop_vars("var1") def test_isel(self): data = create_test_data() @@ -1097,7 +1099,7 @@ def test_isel_fancy(self): actual = data.isel(dim1=stations["dim1s"], dim2=stations["dim2s"]) assert "station" in actual.coords assert "station" in actual.dims - assert_identical(actual["station"].drop(["dim2"]), stations["station"]) + assert_identical(actual["station"].drop_vars(["dim2"]), stations["station"]) with raises_regex(ValueError, "conflicting values for "): data.isel( @@ -1123,7 +1125,7 @@ def test_isel_fancy(self): assert "dim2" in actual.coords assert "a" in actual["dim2"].dims - assert_identical(actual["a"].drop(["dim2"]), stations["a"]) + assert_identical(actual["a"].drop_vars(["dim2"]), stations["a"]) assert_identical(actual["b"], stations["b"]) expected_var1 = data["var1"].variable[ stations["dim1s"].variable, stations["dim2s"].variable @@ -1132,7 +1134,7 @@ def test_isel_fancy(self): stations["dim1s"].variable, stations["dim2s"].variable ] expected_var3 = data["var3"].variable[slice(None), stations["dim1s"].variable] - assert_equal(actual["a"].drop("dim2"), stations["a"]) + assert_equal(actual["a"].drop_vars("dim2"), stations["a"]) assert_array_equal(actual["var1"], expected_var1) assert_array_equal(actual["var2"], expected_var2) assert_array_equal(actual["var3"], expected_var3) @@ -1200,7 +1202,7 @@ def test_isel_dataarray(self): indexing_da = indexing_da < 3 actual = data.isel(dim2=indexing_da) assert_identical( - actual["dim2"].drop("non_dim").drop("non_dim2"), data["dim2"][:2] + actual["dim2"].drop_vars("non_dim").drop_vars("non_dim2"), data["dim2"][:2] ) assert_identical(actual["non_dim"], indexing_da["non_dim"][:2]) assert_identical(actual["non_dim2"], indexing_da["non_dim2"]) @@ -1286,8 +1288,10 @@ def test_sel_dataarray(self): expected = data.isel(dim2=[0, 1, 2]).rename({"dim2": "new_dim"}) assert "new_dim" in actual.dims assert "new_dim" in actual.coords - assert_equal(actual.drop("new_dim").drop("dim2"), expected.drop("new_dim")) - assert_equal(actual["new_dim"].drop("dim2"), ind["new_dim"]) + assert_equal( + actual.drop_vars("new_dim").drop_vars("dim2"), expected.drop_vars("new_dim") + ) + assert_equal(actual["new_dim"].drop_vars("dim2"), ind["new_dim"]) # with conflicted coordinate (silently ignored) ind = DataArray( @@ -1304,10 +1308,12 @@ def test_sel_dataarray(self): coords={"new_dim": ["a", "b", "c"], "dim2": 3}, ) actual = data.sel(dim2=ind) - assert_equal(actual["new_dim"].drop("dim2"), ind["new_dim"].drop("dim2")) + assert_equal( + actual["new_dim"].drop_vars("dim2"), ind["new_dim"].drop_vars("dim2") + ) expected = data.isel(dim2=[0, 1, 2]) expected["dim2"] = (("new_dim"), expected["dim2"].values) - assert_equal(actual["dim2"].drop("new_dim"), expected["dim2"]) + assert_equal(actual["dim2"].drop_vars("new_dim"), expected["dim2"]) assert actual["var1"].dims == ("dim1", "new_dim") # with non-dimensional coordinate @@ -1322,7 +1328,7 @@ def test_sel_dataarray(self): ) actual = data.sel(dim2=ind) expected = data.isel(dim2=[0, 1, 2]) - assert_equal(actual.drop("new_dim"), expected) + assert_equal(actual.drop_vars("new_dim"), expected) assert np.allclose(actual["new_dim"].values, ind["new_dim"].values) def test_sel_dataarray_mindex(self): @@ -1554,8 +1560,8 @@ def test_sel_fancy(self): expected_ary = data["foo"][[0, 1, 2], [0, 2, 1]] actual = data.sel(x=idx_x, y=idx_y) assert_array_equal(expected_ary, actual["foo"]) - assert_identical(actual["a"].drop("x"), idx_x["a"]) - assert_identical(actual["b"].drop("y"), idx_y["b"]) + assert_identical(actual["a"].drop_vars("x"), idx_x["a"]) + assert_identical(actual["b"].drop_vars("y"), idx_y["b"]) with pytest.raises(KeyError): data.sel(x=[2.5], y=[2.0], method="pad", tolerance=1e-3) @@ -2094,36 +2100,50 @@ def test_variable_indexing(self): def test_drop_variables(self): data = create_test_data() - assert_identical(data, data.drop([])) + assert_identical(data, data.drop_vars([])) expected = Dataset({k: data[k] for k in data.variables if k != "time"}) - actual = data.drop("time") + actual = data.drop_vars("time") assert_identical(expected, actual) - actual = data.drop(["time"]) + actual = data.drop_vars(["time"]) assert_identical(expected, actual) with raises_regex(ValueError, "cannot be found"): - data.drop("not_found_here") + data.drop_vars("not_found_here") + + actual = data.drop_vars("not_found_here", errors="ignore") + assert_identical(data, actual) + + actual = data.drop_vars(["not_found_here"], errors="ignore") + assert_identical(data, actual) + + actual = data.drop_vars(["time", "not_found_here"], errors="ignore") + assert_identical(expected, actual) + + # deprecated approach with `drop` works (straight copy paste from above) - actual = data.drop("not_found_here", errors="ignore") + with pytest.warns(PendingDeprecationWarning): + actual = data.drop("not_found_here", errors="ignore") assert_identical(data, actual) - actual = data.drop(["not_found_here"], errors="ignore") + with pytest.warns(PendingDeprecationWarning): + actual = data.drop(["not_found_here"], errors="ignore") assert_identical(data, actual) - actual = data.drop(["time", "not_found_here"], errors="ignore") + with pytest.warns(PendingDeprecationWarning): + actual = data.drop(["time", "not_found_here"], errors="ignore") assert_identical(expected, actual) def test_drop_index_labels(self): data = Dataset({"A": (["x", "y"], np.random.randn(2, 3)), "x": ["a", "b"]}) with pytest.warns(DeprecationWarning): - actual = data.drop(["a"], "x") + actual = data.drop(["a"], dim="x") expected = data.isel(x=[1]) assert_identical(expected, actual) with pytest.warns(DeprecationWarning): - actual = data.drop(["a", "b"], "x") + actual = data.drop(["a", "b"], dim="x") expected = data.isel(x=slice(0, 0)) assert_identical(expected, actual) @@ -2147,30 +2167,30 @@ def test_drop_index_labels(self): # DataArrays as labels are a nasty corner case as they are not # Iterable[Hashable] - DataArray.__iter__ yields scalar DataArrays. - actual = data.drop(DataArray(["a", "b", "c"]), "x", errors="ignore") + actual = data.drop_sel(x=DataArray(["a", "b", "c"]), errors="ignore") expected = data.isel(x=slice(0, 0)) assert_identical(expected, actual) + with pytest.warns(DeprecationWarning): + data.drop(DataArray(["a", "b", "c"]), dim="x", errors="ignore") + assert_identical(expected, actual) with raises_regex(ValueError, "does not have coordinate labels"): - data.drop(1, "y") + data.drop_sel(y=1) def test_drop_labels_by_keyword(self): - # Tests for #2910: Support for a additional `drop()` API. data = Dataset( {"A": (["x", "y"], np.random.randn(2, 6)), "x": ["a", "b"], "y": range(6)} ) # Basic functionality. assert len(data.coords["x"]) == 2 - # In the future, this will break. with pytest.warns(DeprecationWarning): ds1 = data.drop(["a"], dim="x") - ds2 = data.drop(x="a") - ds3 = data.drop(x=["a"]) - ds4 = data.drop(x=["a", "b"]) - ds5 = data.drop(x=["a", "b"], y=range(0, 6, 2)) + ds2 = data.drop_sel(x="a") + ds3 = data.drop_sel(x=["a"]) + ds4 = data.drop_sel(x=["a", "b"]) + ds5 = data.drop_sel(x=["a", "b"], y=range(0, 6, 2)) - # In the future, this will result in different behavior. arr = DataArray(range(3), dims=["c"]) with pytest.warns(FutureWarning): data.drop(arr.coords) @@ -2187,10 +2207,11 @@ def test_drop_labels_by_keyword(self): # Error handling if user tries both approaches. with pytest.raises(ValueError): data.drop(labels=["a"], x="a") - with pytest.raises(ValueError): - data.drop(dim="x", x="a") with pytest.raises(ValueError): data.drop(labels=["a"], dim="x", x="a") + warnings.filterwarnings("ignore", r"\W*drop") + with pytest.raises(ValueError): + data.drop(dim="x", x="a") def test_drop_dims(self): data = xr.Dataset( @@ -2203,15 +2224,15 @@ def test_drop_dims(self): ) actual = data.drop_dims("x") - expected = data.drop(["A", "B", "x"]) + expected = data.drop_vars(["A", "B", "x"]) assert_identical(expected, actual) actual = data.drop_dims("y") - expected = data.drop("A") + expected = data.drop_vars("A") assert_identical(expected, actual) actual = data.drop_dims(["x", "y"]) - expected = data.drop(["A", "B", "x"]) + expected = data.drop_vars(["A", "B", "x"]) assert_identical(expected, actual) with pytest.raises((ValueError, KeyError)): @@ -2230,7 +2251,7 @@ def test_drop_dims(self): actual = data.drop_dims("z", errors="wrong_value") actual = data.drop_dims(["x", "y", "z"], errors="ignore") - expected = data.drop(["A", "B", "x"]) + expected = data.drop_vars(["A", "B", "x"]) assert_identical(expected, actual) def test_copy(self): @@ -2571,7 +2592,7 @@ def test_expand_dims_mixed_int_and_coords(self): original["x"].values * np.ones([4, 3, 3]), coords=dict(d=range(4), e=["l", "m", "n"], a=np.linspace(0, 1, 3)), dims=["d", "e", "a"], - ).drop("d"), + ).drop_vars("d"), "y": xr.DataArray( original["y"].values * np.ones([4, 3, 4, 3]), coords=dict( @@ -2581,7 +2602,7 @@ def test_expand_dims_mixed_int_and_coords(self): a=np.linspace(0, 1, 3), ), dims=["d", "e", "b", "a"], - ).drop("d"), + ).drop_vars("d"), }, coords={"c": np.linspace(0, 1, 5)}, ) @@ -3059,7 +3080,7 @@ def test_setitem_with_coords(self): np.arange(10), dims="dim3", coords={"numbers": ("dim3", np.arange(10))} ) expected = ds.copy() - expected["var3"] = other.drop("numbers") + expected["var3"] = other.drop_vars("numbers") actual = ds.copy() actual["var3"] = other assert_identical(expected, actual) @@ -4504,7 +4525,9 @@ def test_apply(self): actual = data.apply(lambda x: x.mean(keep_attrs=True), keep_attrs=True) assert_identical(expected, actual) - assert_identical(data.apply(lambda x: x, keep_attrs=True), data.drop("time")) + assert_identical( + data.apply(lambda x: x, keep_attrs=True), data.drop_vars("time") + ) def scale(x, multiple=1): return multiple * x @@ -4514,7 +4537,7 @@ def scale(x, multiple=1): assert_identical(actual["numbers"], data["numbers"]) actual = data.apply(np.asarray) - expected = data.drop("time") # time is not used on a data var + expected = data.drop_vars("time") # time is not used on a data var assert_equal(expected, actual) def make_example_math_dataset(self): @@ -4616,7 +4639,7 @@ def test_dataset_math_auto_align(self): assert_identical(expected, actual) actual = ds.isel(y=slice(1)) + ds.isel(y=slice(1, None)) - expected = 2 * ds.drop(ds.y, dim="y") + expected = 2 * ds.drop_sel(y=ds.y) assert_equal(actual, expected) actual = ds + ds[["bar"]] diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 9df2f167cf2..f678af2fec5 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -441,7 +441,8 @@ def test_argmin_max(dim_num, dtype, contains_nan, dask, func, skipna, aggdim): ) expected = getattr(da, func)(dim=aggdim, skipna=skipna) assert_allclose( - actual.drop(list(actual.coords)), expected.drop(list(expected.coords)) + actual.drop_vars(list(actual.coords)), + expected.drop_vars(list(expected.coords)), ) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index b9dc9a71acc..b93325d7eab 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -553,7 +553,7 @@ def test_datetime_single_string(): actual = da.interp(time="2000-01-01T12:00") expected = xr.DataArray(0.5) - assert_allclose(actual.drop("time"), expected) + assert_allclose(actual.drop_vars("time"), expected) @requires_cftime diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 7deabd46eae..6e283ea01da 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -1837,7 +1837,11 @@ def test_default_labels(self): assert substring_in_axes(self.darray.name, ax) def test_test_empty_cell(self): - g = self.darray.isel(row=1).drop("row").plot(col="col", hue="hue", col_wrap=2) + g = ( + self.darray.isel(row=1) + .drop_vars("row") + .plot(col="col", hue="hue", col_wrap=2) + ) bottomright = g.axes[-1, -1] assert not bottomright.has_data() assert not bottomright.get_visible() diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 9d14104bb50..80063f8b4bc 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -1093,7 +1093,7 @@ def test_content_manipulation(self, func, dtype): "func", ( pytest.param( - method("drop", labels=np.array([1, 5]), dim="x"), + method("drop_sel", labels=dict(x=np.array([1, 5]))), marks=pytest.mark.xfail( reason="selecting using incompatible units does not raise" ), @@ -1128,9 +1128,9 @@ def test_content_manipulation_with_units(self, func, unit, error, dtype): expected = attach_units( func(strip_units(data_array), **stripped_kwargs), - {"data": quantity.units if func.name == "drop" else unit, "x": x.units}, + {"data": quantity.units if func.name == "drop_sel" else unit, "x": x.units}, ) - if error is not None and func.name == "drop": + if error is not None and func.name == "drop_sel": with pytest.raises(error): func(data_array, **kwargs) else: