Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add "errors" keyword argument to drop() and drop_dims() (#2994) #3028

Merged
merged 6 commits into from
Jun 20, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ Enhancements
formatted datetimes. By `Alan Brammer <https://github.com/abrammer>`_.
- Add ``.str`` accessor to DataArrays for string related manipulations.
By `0x0L <https://github.com/0x0L>`_.
- Add ``errors`` keyword argument to :py:meth:`Dataset.drop` and :py:meth:`Dataset.drop_dims`
that allows ignoring errors if a passed label or dimension is not in the dataset
(:issue:`2994`).
By `Andrew Ross <https://github.com/andrew-c-ross>`_.


Bug fixes
~~~~~~~~~
Expand Down
10 changes: 7 additions & 3 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1461,7 +1461,7 @@ def transpose(self, *dims, transpose_coords=None) -> 'DataArray':
def T(self) -> 'DataArray':
return self.transpose()

def drop(self, labels, dim=None):
def drop(self, labels, dim=None, errors='raise'):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's make this new argument require using a keyword argument:

Suggested change
def drop(self, labels, dim=None, errors='raise'):
def drop(self, labels, dim=None, *, errors='raise'):

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. I'll add this to the methods for both Dataset and DataArray, since they take the same arguments.

Does it make sense to also add to Dataset.drop_dims()? It is similar but takes no other keywords:
def drop_dims(self, drop_dims, errors='raise'):

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, let's make that keyword argument only, too.

"""Drop coordinates or index labels from this DataArray.

Parameters
Expand All @@ -1471,14 +1471,18 @@ def drop(self, labels, dim=None):
dim : str, optional
Dimension along which to drop index labels. By default (if
``dim is None``), drops coordinates rather than index labels.

errors: {'raise', 'ignore'}, optional
If 'raise' (default), raises a ValueError error if
any of the coordinates or index labels passed are not
in the array. If 'ignore', any given labels that are in the
array are dropped and no error is raised.
Returns
-------
dropped : DataArray
"""
if utils.is_scalar(labels):
labels = [labels]
ds = self._to_temp_dataset().drop(labels, dim)
ds = self._to_temp_dataset().drop(labels, dim, errors=errors)
return self._from_temp_dataset(ds)

def dropna(self, dim, how='any', thresh=None):
Expand Down
37 changes: 27 additions & 10 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2823,7 +2823,7 @@ def _assert_all_in_dataset(self, names, virtual_okay=False):
raise ValueError('One or more of the specified variables '
'cannot be found in this dataset')

def drop(self, labels, dim=None):
def drop(self, labels, dim=None, errors='raise'):
"""Drop variables or index labels from this dataset.

Parameters
Expand All @@ -2833,33 +2833,41 @@ def drop(self, labels, dim=None):
dim : None or str, optional
Dimension along which to drop index labels. By default (if
``dim is None``), drops variables rather than index labels.
errors: {'raise', 'ignore'}, optional
If 'raise' (default), raises a ValueError error if
any of the variable or index labels passed are not
in the dataset. If 'ignore', any given labels that are in the
dataset are dropped and no error is raised.

Returns
-------
dropped : Dataset
"""
if errors not in ['raise', 'ignore']:
raise ValueError('errors must be either "raise" or "ignore"')
if utils.is_scalar(labels):
labels = [labels]
if dim is None:
return self._drop_vars(labels)
return self._drop_vars(labels, errors=errors)
else:
try:
index = self.indexes[dim]
except KeyError:
raise ValueError(
'dimension %r does not have coordinate labels' % dim)
new_index = index.drop(labels)
new_index = index.drop(labels, errors=errors)
return self.loc[{dim: new_index}]

def _drop_vars(self, names):
self._assert_all_in_dataset(names)
def _drop_vars(self, names, errors='raise'):
if errors == 'raise':
self._assert_all_in_dataset(names)
drop = set(names)
variables = OrderedDict((k, v) for k, v in self._variables.items()
if k not in drop)
coord_names = set(k for k in self._coord_names if k in variables)
return self._replace_vars_and_dims(variables, coord_names)

def drop_dims(self, drop_dims):
def drop_dims(self, drop_dims, errors='raise'):
"""Drop dimensions and associated variables from this dataset.

Parameters
Expand All @@ -2872,14 +2880,23 @@ def drop_dims(self, drop_dims):
obj : Dataset
The dataset without the given dimensions (or any variables
containing those dimensions)
errors: {'raise', 'ignore'}, optional
If 'raise' (default), raises a ValueError error if
any of the dimensions passed are not
in the dataset. If 'ignore', any given dimensions that are in the
dataset are dropped and no error is raised.
"""
if errors not in ['raise', 'ignore']:
raise ValueError('errors must be either "raise" or "ignore"')

if utils.is_scalar(drop_dims):
drop_dims = [drop_dims]

missing_dimensions = [d for d in drop_dims if d not in self.dims]
if missing_dimensions:
raise ValueError('Dataset does not contain the dimensions: %s'
% missing_dimensions)
if errors == 'raise':
missing_dimensions = [d for d in drop_dims if d not in self.dims]
if missing_dimensions:
raise ValueError('Dataset does not contain the dimensions: %s'
% missing_dimensions)

drop_vars = set(k for k, v in self._variables.items()
for d in v.dims if d in drop_dims)
Expand Down
17 changes: 16 additions & 1 deletion xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1859,19 +1859,34 @@ def test_drop_coordinates(self):
with pytest.raises(ValueError):
arr.drop('not found')

actual = expected.drop('not found', errors='ignore')
assert_identical(actual, expected)

with raises_regex(ValueError, 'cannot be found'):
arr.drop(None)

actual = expected.drop(None, errors='ignore')
assert_identical(actual, expected)

renamed = arr.rename('foo')
with raises_regex(ValueError, 'cannot be found'):
renamed.drop('foo')

actual = renamed.drop('foo', errors='ignore')
assert_identical(actual, renamed)

def test_drop_index_labels(self):
arr = DataArray(np.random.randn(2, 3), coords={'y': [0, 1, 2]},
dims=['x', 'y'])
actual = arr.drop([0, 1], dim='y')
expected = arr[:, 2:]
assert_identical(expected, actual)
assert_identical(actual, expected)

with raises_regex((KeyError, ValueError), 'not .* in axis'):
actual = arr.drop([0, 1, 3], dim='y')

actual = arr.drop([0, 1, 3], dim='y', errors='ignore')
assert_identical(actual, expected)

def test_dropna(self):
x = np.random.randn(4, 4)
Expand Down
35 changes: 35 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1889,6 +1889,15 @@ def test_drop_variables(self):
with raises_regex(ValueError, 'cannot be found'):
data.drop('not_found_here')

actual = data.drop('not_found_here', errors='ignore')
assert_identical(data, actual)

actual = data.drop(['not_found_here'], errors='ignore')
assert_identical(data, actual)

actual = data.drop(['time', 'not_found_here'], errors='ignore')
assert_identical(expected, actual)

def test_drop_index_labels(self):
data = Dataset({'A': (['x', 'y'], np.random.randn(2, 3)),
'x': ['a', 'b']})
Expand All @@ -1907,6 +1916,16 @@ def test_drop_index_labels(self):
# not contained in axis
data.drop(['c'], dim='x')

actual = data.drop(['c'], dim='x', errors='ignore')
assert_identical(data, actual)

with pytest.raises(ValueError):
data.drop(['c'], dim='x', errors='wrong_value')

actual = data.drop(['a', 'b', 'c'], 'x', errors='ignore')
expected = data.isel(x=slice(0, 0))
assert_identical(expected, actual)

with raises_regex(
ValueError, 'does not have coordinate labels'):
data.drop(1, 'y')
Expand All @@ -1931,6 +1950,22 @@ def test_drop_dims(self):
with pytest.raises((ValueError, KeyError)):
data.drop_dims('z') # not a dimension

with pytest.raises((ValueError, KeyError)):
data.drop_dims(None)

actual = data.drop_dims('z', errors='ignore')
assert_identical(data, actual)

actual = data.drop_dims(None, errors='ignore')
assert_identical(data, actual)

with pytest.raises(ValueError):
actual = data.drop_dims('z', errors='wrong_value')

actual = data.drop_dims(['x', 'y', 'z'], errors='ignore')
expected = data.drop(['A', 'B', 'x'])
assert_identical(expected, actual)

def test_copy(self):
data = create_test_data()
data.attrs['Test'] = [1, 2, 3]
Expand Down