diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8729f586b55..a127cef84f9 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -65,9 +65,15 @@ Enhancements ``append_dim`` is set, as it will automatically be set to ``'a'`` internally. By `David Brochart `_. -- :py:meth:`~xarray.Dataset.drop` now supports keyword arguments; dropping index labels by specifying both ``dim`` and ``labels`` is deprecated (:issue:`2910`). +- :py:meth:`~xarray.Dataset.drop` now supports keyword arguments; dropping index + labels by specifying both ``dim`` and ``labels`` is deprecated (:issue:`2910`). By `Gregory Gundersen `_. +- Added examples of :py:meth:`Dataset.set_index` and + :py:meth:`DataArray.set_index`, as well are more specific error messages + when the user passes invalid arguments (:issue:`3176`). + By `Gregory Gundersen `_. + Bug fixes ~~~~~~~~~ - Fix regression introduced in v0.12.2 where ``copy(deep=True)`` would convert diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b311a847790..52c11429e2b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1524,6 +1524,30 @@ def set_index( Another DataArray, with this data but replaced coordinates. Return None if inplace=True. + Example + ------- + >>> arr = xr.DataArray(data=np.ones((2, 3)), + ... dims=['x', 'y'], + ... coords={'x': + ... range(2), 'y': + ... range(3), 'a': ('x', [3, 4]) + ... }) + >>> arr + + array([[1., 1., 1.], + [1., 1., 1.]]) + Coordinates: + * x (x) int64 0 1 + * y (y) int64 0 1 2 + a (x) int64 3 4 + >>> arr.set_index(x='a') + + array([[1., 1., 1.], + [1., 1., 1.]]) + Coordinates: + * x (x) int64 3 4 + * y (y) int64 0 1 2 + See Also -------- DataArray.reset_index diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d25f24c1e28..6a606fd0c31 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -198,6 +198,7 @@ def merge_indexes( """ vars_to_replace = {} # Dict[Any, Variable] vars_to_remove = [] # type: list + error_msg = "{} is not the name of an existing variable." for dim, var_names in indexes.items(): if isinstance(var_names, str) or not isinstance(var_names, Sequence): @@ -207,7 +208,10 @@ def merge_indexes( current_index_variable = variables.get(dim) for n in var_names: - var = variables[n] + try: + var = variables[n] + except KeyError: + raise ValueError(error_msg.format(n)) if ( current_index_variable is not None and var.dims != current_index_variable.dims @@ -239,8 +243,11 @@ def merge_indexes( else: for n in var_names: + try: + var = variables[n] + except KeyError: + raise ValueError(error_msg.format(n)) names.append(n) - var = variables[n] cat = pd.Categorical(var.values, ordered=True) codes.append(cat.codes) levels.append(cat.categories) @@ -2952,6 +2959,33 @@ def set_index( obj : Dataset Another dataset, with this dataset's data but replaced coordinates. + Examples + -------- + >>> arr = xr.DataArray(data=np.ones((2, 3)), + ... dims=['x', 'y'], + ... coords={'x': + ... range(2), 'y': + ... range(3), 'a': ('x', [3, 4]) + ... }) + >>> ds = xr.Dataset({'v': arr}) + >>> ds + + Dimensions: (x: 2, y: 3) + Coordinates: + * x (x) int64 0 1 + * y (y) int64 0 1 2 + a (x) int64 3 4 + Data variables: + v (x, y) float64 1.0 1.0 1.0 1.0 1.0 1.0 + >>> ds.set_index(x='a') + + Dimensions: (x: 2, y: 3) + Coordinates: + * x (x) int64 3 4 + * y (y) int64 0 1 2 + Data variables: + v (x, y) float64 1.0 1.0 1.0 1.0 1.0 1.0 + See Also -------- Dataset.reset_index diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 18aed7a0156..506c437c2bf 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1716,6 +1716,11 @@ def test_set_index(self): with raises_regex(ValueError, "dimension mismatch"): array2d.set_index(x="level") + # Issue 3176: Ensure clear error message on key error. + with pytest.raises(ValueError) as excinfo: + obj.set_index(x="level_4") + assert str(excinfo.value) == "level_4 is not the name of an existing variable." + def test_reset_index(self): indexes = [self.mindex.get_level_values(n) for n in self.mindex.names] coords = {idx.name: ("x", idx) for idx in indexes} diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 1b78d139485..23bc2b47e43 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2712,6 +2712,11 @@ def test_set_index(self): expected = Dataset(coords={"x": [0, 1, 2]}) assert_identical(ds.set_index(x="x_var"), expected) + # Issue 3176: Ensure clear error message on key error. + with pytest.raises(ValueError) as excinfo: + ds.set_index(foo="bar") + assert str(excinfo.value) == "bar is not the name of an existing variable." + def test_reset_index(self): ds = create_test_multiindex() mindex = ds["x"].to_index()