From aadd0b2f528c54924d37cb43f4659c5ef96d2400 Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Mon, 17 Jun 2019 09:56:35 -0400 Subject: [PATCH 1/6] Add "errors" keyword argument (GH2994) Adds an errors keyword to Dataset.drop(), Dataset.drop_dims(), and DataArray.drop() (GH2994). Consistent with pandas, the value can be either "raise" or "ignore" --- doc/whats-new.rst | 5 +++++ xarray/core/dataarray.py | 10 ++++++--- xarray/core/dataset.py | 37 +++++++++++++++++++++++++--------- xarray/tests/test_dataarray.py | 17 +++++++++++++++- xarray/tests/test_dataset.py | 35 ++++++++++++++++++++++++++++++++ 5 files changed, 90 insertions(+), 14 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e62c7e87d44..ca50856a25e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -59,6 +59,11 @@ Enhancements formatted datetimes. By `Alan Brammer `_. - Add ``.str`` accessor to DataArrays for string related manipulations. By `0x0L `_. +- Add ``errors`` keyword argument to :py:meth:`Dataset.drop` and :py:meth:`Dataset.drop_dims` + that allows ignoring errors if a passed label or dimension is not in the dataset + (:issue:`2994`). + By `Andrew Ross `_. + Bug fixes ~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 094b8615880..119d9799696 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1461,7 +1461,7 @@ def transpose(self, *dims, transpose_coords=None) -> 'DataArray': def T(self) -> 'DataArray': return self.transpose() - def drop(self, labels, dim=None): + def drop(self, labels, dim=None, errors='raise'): """Drop coordinates or index labels from this DataArray. Parameters @@ -1471,14 +1471,18 @@ def drop(self, labels, dim=None): dim : str, optional Dimension along which to drop index labels. By default (if ``dim is None``), drops coordinates rather than index labels. - + errors: {'raise', 'ignore'}, optional + If 'raise' (default), raises a ValueError error if + any of the variable or index labels passed are not + in the dataset. If ``ignore'', any given labels that are in the + dataset are dropped and no error is raised. Returns ------- dropped : DataArray """ if utils.is_scalar(labels): labels = [labels] - ds = self._to_temp_dataset().drop(labels, dim) + ds = self._to_temp_dataset().drop(labels, dim, errors=errors) return self._from_temp_dataset(ds) def dropna(self, dim, how='any', thresh=None): diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ced1dba09e2..13a0c69260c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2823,7 +2823,7 @@ def _assert_all_in_dataset(self, names, virtual_okay=False): raise ValueError('One or more of the specified variables ' 'cannot be found in this dataset') - def drop(self, labels, dim=None): + def drop(self, labels, dim=None, errors='raise'): """Drop variables or index labels from this dataset. Parameters @@ -2833,33 +2833,41 @@ def drop(self, labels, dim=None): dim : None or str, optional Dimension along which to drop index labels. By default (if ``dim is None``), drops variables rather than index labels. + errors: {'raise', 'ignore'}, optional + If 'raise' (default), raises a ValueError error if + any of the variable or index labels passed are not + in the dataset. If ``ignore'', any given labels that are in the + dataset are dropped and no error is raised. Returns ------- dropped : Dataset """ + if errors not in ['raise', 'ignore']: + raise ValueError('errors must be either "raise" or "ignore"') if utils.is_scalar(labels): labels = [labels] if dim is None: - return self._drop_vars(labels) + return self._drop_vars(labels, errors=errors) else: try: index = self.indexes[dim] except KeyError: raise ValueError( 'dimension %r does not have coordinate labels' % dim) - new_index = index.drop(labels) + new_index = index.drop(labels, errors=errors) return self.loc[{dim: new_index}] - def _drop_vars(self, names): - self._assert_all_in_dataset(names) + def _drop_vars(self, names, errors='raise'): + if errors == 'raise': + self._assert_all_in_dataset(names) drop = set(names) variables = OrderedDict((k, v) for k, v in self._variables.items() if k not in drop) coord_names = set(k for k in self._coord_names if k in variables) return self._replace_vars_and_dims(variables, coord_names) - def drop_dims(self, drop_dims): + def drop_dims(self, drop_dims, errors='raise'): """Drop dimensions and associated variables from this dataset. Parameters @@ -2872,14 +2880,23 @@ def drop_dims(self, drop_dims): obj : Dataset The dataset without the given dimensions (or any variables containing those dimensions) + errors: {'raise', 'ignore'}, optional + If 'raise' (default), raises a ValueError error if + any of the dimensions passed are not + in the dataset. If ``ignore'', any given dimensions that are in the + dataset are dropped and no error is raised. """ + if errors not in ['raise', 'ignore']: + raise ValueError('errors must be either "raise" or "ignore"') + if utils.is_scalar(drop_dims): drop_dims = [drop_dims] - missing_dimensions = [d for d in drop_dims if d not in self.dims] - if missing_dimensions: - raise ValueError('Dataset does not contain the dimensions: %s' - % missing_dimensions) + if errors == 'raise': + missing_dimensions = [d for d in drop_dims if d not in self.dims] + if missing_dimensions: + raise ValueError('Dataset does not contain the dimensions: %s' + % missing_dimensions) drop_vars = set(k for k, v in self._variables.items() for d in v.dims if d in drop_dims) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index fd9076e7f65..69dff67271f 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1859,19 +1859,34 @@ def test_drop_coordinates(self): with pytest.raises(ValueError): arr.drop('not found') + actual = expected.drop('not found', errors='ignore') + assert_identical(actual, expected) + with raises_regex(ValueError, 'cannot be found'): arr.drop(None) + actual = expected.drop(None, errors='ignore') + assert_identical(actual, expected) + renamed = arr.rename('foo') with raises_regex(ValueError, 'cannot be found'): renamed.drop('foo') + actual = renamed.drop('foo', errors='ignore') + assert_identical(actual, renamed) + def test_drop_index_labels(self): arr = DataArray(np.random.randn(2, 3), coords={'y': [0, 1, 2]}, dims=['x', 'y']) actual = arr.drop([0, 1], dim='y') expected = arr[:, 2:] - assert_identical(expected, actual) + assert_identical(actual, expected) + + with raises_regex(KeyError, 'not found'): + actual = arr.drop([0, 1, 3], dim='y') + + actual = arr.drop([0, 1, 3], dim='y', errors='ignore') + assert_identical(actual, expected) def test_dropna(self): x = np.random.randn(4, 4) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 812e2893db5..e8c07d3da0c 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1889,6 +1889,15 @@ def test_drop_variables(self): with raises_regex(ValueError, 'cannot be found'): data.drop('not_found_here') + actual = data.drop('not_found_here', errors='ignore') + assert_identical(data, actual) + + actual = data.drop(['not_found_here'], errors='ignore') + assert_identical(data, actual) + + actual = data.drop(['time', 'not_found_here'], errors='ignore') + assert_identical(expected, actual) + def test_drop_index_labels(self): data = Dataset({'A': (['x', 'y'], np.random.randn(2, 3)), 'x': ['a', 'b']}) @@ -1907,6 +1916,16 @@ def test_drop_index_labels(self): # not contained in axis data.drop(['c'], dim='x') + actual = data.drop(['c'], dim='x', errors='ignore') + assert_identical(data, actual) + + with pytest.raises(ValueError): + data.drop(['c'], dim='x', errors='wrong_value') + + actual = data.drop(['a', 'b', 'c'], 'x', errors='ignore') + expected = data.isel(x=slice(0, 0)) + assert_identical(expected, actual) + with raises_regex( ValueError, 'does not have coordinate labels'): data.drop(1, 'y') @@ -1931,6 +1950,22 @@ def test_drop_dims(self): with pytest.raises((ValueError, KeyError)): data.drop_dims('z') # not a dimension + with pytest.raises((ValueError, KeyError)): + data.drop_dims(None) + + actual = data.drop_dims('z', errors='ignore') + assert_identical(data, actual) + + actual = data.drop_dims(None, errors='ignore') + assert_identical(data, actual) + + with pytest.raises(ValueError): + actual = data.drop_dims('z', errors='wrong_value') + + actual = data.drop_dims(['x', 'y', 'z'], errors='ignore') + expected = data.drop(['A', 'B', 'x']) + assert_identical(expected, actual) + def test_copy(self): data = create_test_data() data.attrs['Test'] = [1, 2, 3] From 8b441b825c73228f5f8edb716e42959029690ac2 Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Mon, 17 Jun 2019 10:06:39 -0400 Subject: [PATCH 2/6] Fix quotes --- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 119d9799696..1166ad9ab72 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1474,7 +1474,7 @@ def drop(self, labels, dim=None, errors='raise'): errors: {'raise', 'ignore'}, optional If 'raise' (default), raises a ValueError error if any of the variable or index labels passed are not - in the dataset. If ``ignore'', any given labels that are in the + in the dataset. If 'ignore', any given labels that are in the dataset are dropped and no error is raised. Returns ------- diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 13a0c69260c..b2d52696c14 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2836,7 +2836,7 @@ def drop(self, labels, dim=None, errors='raise'): errors: {'raise', 'ignore'}, optional If 'raise' (default), raises a ValueError error if any of the variable or index labels passed are not - in the dataset. If ``ignore'', any given labels that are in the + in the dataset. If 'ignore', any given labels that are in the dataset are dropped and no error is raised. Returns @@ -2883,7 +2883,7 @@ def drop_dims(self, drop_dims, errors='raise'): errors: {'raise', 'ignore'}, optional If 'raise' (default), raises a ValueError error if any of the dimensions passed are not - in the dataset. If ``ignore'', any given dimensions that are in the + in the dataset. If 'ignore', any given dimensions that are in the dataset are dropped and no error is raised. """ if errors not in ['raise', 'ignore']: From 9a651e46ac009f696688585ec08ce1b648abf3b4 Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Mon, 17 Jun 2019 10:52:17 -0400 Subject: [PATCH 3/6] Different pandas versions raise different errors --- xarray/tests/test_dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 69dff67271f..0bc0604877d 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1882,7 +1882,7 @@ def test_drop_index_labels(self): expected = arr[:, 2:] assert_identical(actual, expected) - with raises_regex(KeyError, 'not found'): + with raises_regex((KeyError, ValueError), 'not found'): actual = arr.drop([0, 1, 3], dim='y') actual = arr.drop([0, 1, 3], dim='y', errors='ignore') From cecfba3ed3b668fa272a416ff61e4b49e9f235c0 Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Mon, 17 Jun 2019 11:11:01 -0400 Subject: [PATCH 4/6] Error messages also vary --- xarray/tests/test_dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 0bc0604877d..a8825055479 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1882,7 +1882,7 @@ def test_drop_index_labels(self): expected = arr[:, 2:] assert_identical(actual, expected) - with raises_regex((KeyError, ValueError), 'not found'): + with raises_regex((KeyError, ValueError), 'not .* in axis'): actual = arr.drop([0, 1, 3], dim='y') actual = arr.drop([0, 1, 3], dim='y', errors='ignore') From 5a2f915253a3076305f24a13d2b29828beb89d13 Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Tue, 18 Jun 2019 14:01:30 -0400 Subject: [PATCH 5/6] Correct doc for DataArray.drop; array, not dataset --- xarray/core/dataarray.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 1166ad9ab72..58d888a8ac5 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1473,9 +1473,9 @@ def drop(self, labels, dim=None, errors='raise'): ``dim is None``), drops coordinates rather than index labels. errors: {'raise', 'ignore'}, optional If 'raise' (default), raises a ValueError error if - any of the variable or index labels passed are not - in the dataset. If 'ignore', any given labels that are in the - dataset are dropped and no error is raised. + any of the coordinates or index labels passed are not + in the array. If 'ignore', any given labels that are in the + array are dropped and no error is raised. Returns ------- dropped : DataArray From 51a22fa4bcc0904de63966cce24314806f0d4462 Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Thu, 20 Jun 2019 10:53:26 -0400 Subject: [PATCH 6/6] Require errors argument to be passed with a keyword --- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 58d888a8ac5..e560037a419 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1461,7 +1461,7 @@ def transpose(self, *dims, transpose_coords=None) -> 'DataArray': def T(self) -> 'DataArray': return self.transpose() - def drop(self, labels, dim=None, errors='raise'): + def drop(self, labels, dim=None, *, errors='raise'): """Drop coordinates or index labels from this DataArray. Parameters diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b2d52696c14..fac5699b734 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2823,7 +2823,7 @@ def _assert_all_in_dataset(self, names, virtual_okay=False): raise ValueError('One or more of the specified variables ' 'cannot be found in this dataset') - def drop(self, labels, dim=None, errors='raise'): + def drop(self, labels, dim=None, *, errors='raise'): """Drop variables or index labels from this dataset. Parameters @@ -2867,7 +2867,7 @@ def _drop_vars(self, names, errors='raise'): coord_names = set(k for k in self._coord_names if k in variables) return self._replace_vars_and_dims(variables, coord_names) - def drop_dims(self, drop_dims, errors='raise'): + def drop_dims(self, drop_dims, *, errors='raise'): """Drop dimensions and associated variables from this dataset. Parameters