Skip to content

Commit

Permalink
Support keyword API for Dataset.drop (#3128)
Browse files Browse the repository at this point in the history
* Support for keyword argument-based dropping.

* Cherry picked changes.

* Moved noqa to correct location; added check for when labels are coordinates.

* Added okwarning to drop docs.

* Made unit tests more explicit. Added test for dropping along multiple dimensions.

* Used black.

* Fixed typo.

Co-Authored-By: Maximilian Roos <[email protected]>

* Docs for amended drop API.
  • Loading branch information
Gregory Gundersen authored and max-sixty committed Aug 18, 2019
1 parent 6083d3c commit d578bfe
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 16 deletions.
1 change: 1 addition & 0 deletions doc/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ The :py:meth:`~xarray.Dataset.drop` method returns a new object with the listed
index labels along a dimension dropped:

.. ipython:: python
:okwarning:
ds.drop(['IN', 'IL'], dim='space')
Expand Down
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,17 @@ Enhancements

- Added ``join='override'``. This only checks that index sizes are equal among objects and skips
checking indexes for equality. By `Deepak Cherian <https://github.com/dcherian>`_.

- :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg.
It is passed down to :py:func:`~xarray.align`. By `Deepak Cherian <https://github.com/dcherian>`_.

- In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if
``append_dim`` is set, as it will automatically be set to ``'a'`` internally.
By `David Brochart <https://github.com/davidbrochart>`_.

- :py:meth:`~xarray.Dataset.drop` now supports keyword arguments; dropping index labels by specifying both ``dim`` and ``labels`` is deprecated (:issue:`2910`).
By `Gregory Gundersen <https://github.com/gwgundersen/>`_.

Bug fixes
~~~~~~~~~
- Fix regression introduced in v0.12.2 where ``copy(deep=True)`` would convert
Expand Down
76 changes: 60 additions & 16 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
)
from .coordinates import (
DatasetCoordinates,
DataArrayCoordinates,
LevelCoordinatesSource,
assert_coordinate_consistent,
remap_label_indexers,
Expand Down Expand Up @@ -3450,7 +3451,7 @@ def _assert_all_in_dataset(
)

# Drop variables
@overload
@overload # noqa: F811
def drop(
self, labels: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise"
) -> "Dataset":
Expand All @@ -3463,7 +3464,9 @@ def drop(
) -> "Dataset":
...

def drop(self, labels, dim=None, *, errors="raise"): # noqa: F811
def drop( # noqa: F811
self, labels=None, dim=None, *, errors="raise", **labels_kwargs
):
"""Drop variables or index labels from this dataset.
Parameters
Expand All @@ -3479,34 +3482,75 @@ def drop(self, labels, dim=None, *, errors="raise"): # noqa: F811
any of the variable or index labels passed are not
in the dataset. If 'ignore', any given labels that are in the
dataset are dropped and no error is raised.
**labels_kwargs : {dim: label, ...}, optional
The keyword arguments form of ``dim`` and ``labels``.
Returns
-------
dropped : Dataset
Examples
--------
>>> data = np.random.randn(2, 3)
>>> labels = ['a', 'b', 'c']
>>> ds = xr.Dataset({'A': (['x', 'y'], data), 'y': labels})
>>> ds.drop(y=['a', 'c'])
<xarray.Dataset>
Dimensions: (x: 2, y: 1)
Coordinates:
* y (y) <U1 'b'
Dimensions without coordinates: x
Data variables:
A (x, y) float64 -0.3454 0.1734
>>> ds.drop(y='b')
<xarray.Dataset>
Dimensions: (x: 2, y: 2)
Coordinates:
* y (y) <U1 'a' 'c'
Dimensions without coordinates: x
Data variables:
A (x, y) float64 -0.3944 -1.418 1.423 -1.041
"""
if errors not in ["raise", "ignore"]:
raise ValueError('errors must be either "raise" or "ignore"')

if dim is None:
labels_are_coords = isinstance(labels, DataArrayCoordinates)
if labels_kwargs or (utils.is_dict_like(labels) and not labels_are_coords):
labels_kwargs = utils.either_dict_or_kwargs(labels, labels_kwargs, "drop")
if dim is not None:
raise ValueError("cannot specify dim and dict-like arguments.")
ds = self
for dim, labels in labels_kwargs.items():
ds = ds._drop_labels(labels, dim, errors=errors)
return ds
elif dim is None:
if isinstance(labels, str) or not isinstance(labels, Iterable):
labels = {labels}
else:
labels = set(labels)

return self._drop_vars(labels, errors=errors)
else:
# Don't cast to set, as it would harm performance when labels
# is a large numpy array
if utils.is_scalar(labels):
labels = [labels]
labels = np.asarray(labels)

try:
index = self.indexes[dim]
except KeyError:
raise ValueError("dimension %r does not have coordinate labels" % dim)
new_index = index.drop(labels, errors=errors)
return self.loc[{dim: new_index}]
if utils.is_list_like(labels):
warnings.warn(
"dropping dimensions using list-like labels is deprecated; "
"use dict-like arguments.",
DeprecationWarning,
stacklevel=2,
)
return self._drop_labels(labels, dim, errors=errors)

def _drop_labels(self, labels=None, dim=None, errors="raise"):
# Don't cast to set, as it would harm performance when labels
# is a large numpy array
if utils.is_scalar(labels):
labels = [labels]
labels = np.asarray(labels)
try:
index = self.indexes[dim]
except KeyError:
raise ValueError("dimension %r does not have coordinate labels" % dim)
new_index = index.drop(labels, errors=errors)
return self.loc[{dim: new_index}]

def _drop_vars(self, names: set, errors: str = "raise") -> "Dataset":
if errors == "raise":
Expand Down
4 changes: 4 additions & 0 deletions xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,10 @@ def is_full_slice(value: Any) -> bool:
return isinstance(value, slice) and value == slice(None)


def is_list_like(value: Any) -> bool:
return isinstance(value, list) or isinstance(value, tuple)


def either_dict_or_kwargs(
pos_kwargs: Optional[Mapping[Hashable, T]],
kw_kwargs: Mapping[str, T],
Expand Down
31 changes: 31 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2206,6 +2206,37 @@ def test_drop_index_labels(self):
with raises_regex(ValueError, "does not have coordinate labels"):
data.drop(1, "y")

def test_drop_labels_by_keyword(self):
# Tests for #2910: Support for a additional `drop()` API.
data = Dataset(
{"A": (["x", "y"], np.random.randn(2, 6)), "x": ["a", "b"], "y": range(6)}
)
# Basic functionality.
assert len(data.coords["x"]) == 2

# This API is allowed but deprecated.
with pytest.warns(DeprecationWarning):
ds1 = data.drop(["a"], dim="x")
ds2 = data.drop(x="a")
ds3 = data.drop(x=["a"])
ds4 = data.drop(x=["a", "b"])
ds5 = data.drop(x=["a", "b"], y=range(0, 6, 2))

assert_array_equal(ds1.coords["x"], ["b"])
assert_array_equal(ds2.coords["x"], ["b"])
assert_array_equal(ds3.coords["x"], ["b"])
assert ds4.coords["x"].size == 0
assert ds5.coords["x"].size == 0
assert_array_equal(ds5.coords["y"], [1, 3, 5])

# Error handling if user tries both approaches.
with pytest.raises(ValueError):
data.drop(labels=["a"], x="a")
with pytest.raises(ValueError):
data.drop(dim="x", x="a")
with pytest.raises(ValueError):
data.drop(labels=["a"], dim="x", x="a")

def test_drop_dims(self):
data = xr.Dataset(
{
Expand Down

0 comments on commit d578bfe

Please sign in to comment.