Skip to content

Commit

Permalink
Consistently report all dimensions in error messages if invalid dimen…
Browse files Browse the repository at this point in the history
…sions are given (#8079)

* Show dims and coords in idxmin/idxmax error message if an invalid dim is given

* Show data dims in error messages of Dataset and update tests

Remove _assert_empty, not used anymore

* Update test for dataarray

* Show data dims in error messages of weighted and update test

* Show dimensions in error message of group_indexers_by_index

* List coordinates in concat error message, update test

* List coordinates in coords __delitem__ error message, update tests

* Show list of names in error message of PandasMultiIndex.sel, update test

* Show list of dimensions in error messages of Rolling and Coarsen, update tests

* Show dims in Variable.concat error message as tuple for consistency

* Change 'dataset' to 'data' in error messages

* Update whats-new

---------

Co-authored-by: Deepak Cherian <[email protected]>
  • Loading branch information
mgunyho and dcherian authored Sep 9, 2023
1 parent aea9af0 commit 0afbd45
Show file tree
Hide file tree
Showing 18 changed files with 177 additions and 69 deletions.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ Documentation
Internal Changes
~~~~~~~~~~~~~~~~

- Many error messages related to invalid dimensions or coordinates now always show the list of valid dims/coords (:pull:`8079`).
By `András Gunyhó <https://github.com/mgunyho>`_.

.. _whats-new.2023.08.0:

Expand Down
8 changes: 6 additions & 2 deletions xarray/core/computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2046,9 +2046,13 @@ def _calc_idxminmax(
raise ValueError("Must supply 'dim' argument for multidimensional arrays")

if dim not in array.dims:
raise KeyError(f'Dimension "{dim}" not in dimension')
raise KeyError(
f"Dimension {dim!r} not found in array dimensions {array.dims!r}"
)
if dim not in array.coords:
raise KeyError(f'Dimension "{dim}" does not have coordinates')
raise KeyError(
f"Dimension {dim!r} is not one of the coordinates {tuple(array.coords.keys())}"
)

# These are dtypes with NaN values argmin and argmax can handle
na_dtypes = "cfO"
Expand Down
13 changes: 8 additions & 5 deletions xarray/core/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,17 +391,20 @@ def process_subset_opt(opt, subset):
else:
raise ValueError(f"unexpected value for {subset}: {opt}")
else:
invalid_vars = [k for k in opt if k not in getattr(datasets[0], subset)]
valid_vars = tuple(getattr(datasets[0], subset))
invalid_vars = [k for k in opt if k not in valid_vars]
if invalid_vars:
if subset == "coords":
raise ValueError(
"some variables in coords are not coordinates on "
f"the first dataset: {invalid_vars}"
f"the variables {invalid_vars} in coords are not "
f"found in the coordinates of the first dataset {valid_vars}"
)
else:
# note: data_vars are not listed in the error message here,
# because there may be lots of them
raise ValueError(
"some variables in data_vars are not data variables "
f"on the first dataset: {invalid_vars}"
f"the variables {invalid_vars} in data_vars are not "
f"found in the data variables of the first dataset"
)
concat_over.update(opt)

Expand Down
8 changes: 6 additions & 2 deletions xarray/core/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,9 @@ def __delitem__(self, key: Hashable) -> None:
if key in self:
del self._data[key]
else:
raise KeyError(f"{key!r} is not a coordinate variable.")
raise KeyError(
f"{key!r} is not in coordinate variables {tuple(self.keys())}"
)

def _ipython_key_completions_(self):
"""Provide method for the key-autocompletions in IPython."""
Expand Down Expand Up @@ -855,7 +857,9 @@ def to_dataset(self) -> Dataset:

def __delitem__(self, key: Hashable) -> None:
if key not in self:
raise KeyError(f"{key!r} is not a coordinate variable.")
raise KeyError(
f"{key!r} is not in coordinate variables {tuple(self.keys())}"
)
assert_no_index_corrupted(self._data.xindexes, {key})

del self._data._coords[key]
Expand Down
69 changes: 42 additions & 27 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,11 +213,6 @@ def _get_virtual_variable(
return ref_name, var_name, virtual_var


def _assert_empty(args: tuple, msg: str = "%s") -> None:
if args:
raise ValueError(msg % args)


def _get_chunk(var: Variable, chunks, chunkmanager: ChunkManagerEntrypoint):
"""
Return map from each dim to chunk sizes, accounting for backend's preferred chunks.
Expand Down Expand Up @@ -2640,7 +2635,7 @@ def chunk(
bad_dims = chunks.keys() - self.dims.keys()
if bad_dims:
raise ValueError(
f"some chunks keys are not dimensions on this object: {bad_dims}"
f"chunks keys {tuple(bad_dims)} not found in data dimensions {tuple(self.dims)}"
)

chunkmanager = guess_chunkmanager(chunked_array_type)
Expand Down Expand Up @@ -4243,8 +4238,8 @@ def rename_dims(
for k, v in dims_dict.items():
if k not in self.dims:
raise ValueError(
f"cannot rename {k!r} because it is not a "
"dimension in this dataset"
f"cannot rename {k!r} because it is not found "
f"in the dimensions of this dataset {tuple(self.dims)}"
)
if v in self.dims or v in self:
raise ValueError(
Expand Down Expand Up @@ -4366,7 +4361,7 @@ def swap_dims(
if k not in self.dims:
raise ValueError(
f"cannot swap from dimension {k!r} because it is "
"not an existing dimension"
f"not one of the dimensions of this dataset {tuple(self.dims)}"
)
if v in self.variables and self.variables[v].dims != (k,):
raise ValueError(
Expand Down Expand Up @@ -5448,10 +5443,10 @@ def unstack(
else:
dims = list(dim)

missing_dims = [d for d in dims if d not in self.dims]
missing_dims = set(dims) - set(self.dims)
if missing_dims:
raise ValueError(
f"Dataset does not contain the dimensions: {missing_dims}"
f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}"
)

# each specified dimension must have exactly one multi-index
Expand Down Expand Up @@ -5836,7 +5831,10 @@ def drop_indexes(
if errors == "raise":
invalid_coords = coord_names - self._coord_names
if invalid_coords:
raise ValueError(f"those coordinates don't exist: {invalid_coords}")
raise ValueError(
f"The coordinates {tuple(invalid_coords)} are not found in the "
f"dataset coordinates {tuple(self.coords.keys())}"
)

unindexed_coords = set(coord_names) - set(self._indexes)
if unindexed_coords:
Expand Down Expand Up @@ -6084,7 +6082,7 @@ def drop_dims(
missing_dims = drop_dims - set(self.dims)
if missing_dims:
raise ValueError(
f"Dataset does not contain the dimensions: {missing_dims}"
f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}"
)

drop_vars = {k for k, v in self._variables.items() if set(v.dims) & drop_dims}
Expand Down Expand Up @@ -6244,7 +6242,9 @@ def dropna(
# depending on the order of the supplied axes.

if dim not in self.dims:
raise ValueError(f"{dim} must be a single dataset dimension")
raise ValueError(
f"Dimension {dim!r} not found in data dimensions {tuple(self.dims)}"
)

if subset is None:
subset = iter(self.data_vars)
Expand Down Expand Up @@ -6725,10 +6725,10 @@ def reduce(
else:
dims = set(dim)

missing_dimensions = [d for d in dims if d not in self.dims]
missing_dimensions = tuple(d for d in dims if d not in self.dims)
if missing_dimensions:
raise ValueError(
f"Dataset does not contain the dimensions: {missing_dimensions}"
f"Dimensions {missing_dimensions} not found in data dimensions {tuple(self.dims)}"
)

if keep_attrs is None:
Expand Down Expand Up @@ -7710,9 +7710,11 @@ def shift(
foo (x) object nan nan 'a' 'b' 'c'
"""
shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "shift")
invalid = [k for k in shifts if k not in self.dims]
invalid = tuple(k for k in shifts if k not in self.dims)
if invalid:
raise ValueError(f"dimensions {invalid!r} do not exist")
raise ValueError(
f"Dimensions {invalid} not found in data dimensions {tuple(self.dims)}"
)

variables = {}
for name, var in self.variables.items():
Expand Down Expand Up @@ -7789,7 +7791,9 @@ def roll(
shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "roll")
invalid = [k for k in shifts if k not in self.dims]
if invalid:
raise ValueError(f"dimensions {invalid!r} do not exist")
raise ValueError(
f"Dimensions {invalid} not found in data dimensions {tuple(self.dims)}"
)

unrolled_vars: tuple[Hashable, ...]

Expand Down Expand Up @@ -8038,10 +8042,11 @@ def quantile(
else:
dims = set(dim)

_assert_empty(
tuple(d for d in dims if d not in self.dims),
"Dataset does not contain the dimensions: %s",
)
invalid_dims = set(dims) - set(self.dims)
if invalid_dims:
raise ValueError(
f"Dimensions {tuple(invalid_dims)} not found in data dimensions {tuple(self.dims)}"
)

q = np.asarray(q, dtype=np.float64)

Expand Down Expand Up @@ -8117,7 +8122,9 @@ def rank(
)

if dim not in self.dims:
raise ValueError(f"Dataset does not contain the dimension: {dim}")
raise ValueError(
f"Dimension {dim!r} not found in data dimensions {tuple(self.dims)}"
)

variables = {}
for name, var in self.variables.items():
Expand Down Expand Up @@ -8167,7 +8174,10 @@ def differentiate(
from xarray.core.variable import Variable

if coord not in self.variables and coord not in self.dims:
raise ValueError(f"Coordinate {coord} does not exist.")
variables_and_dims = tuple(set(self.variables.keys()).union(self.dims))
raise ValueError(
f"Coordinate {coord!r} not found in variables or dimensions {variables_and_dims}."
)

coord_var = self[coord].variable
if coord_var.ndim != 1:
Expand Down Expand Up @@ -8269,7 +8279,10 @@ def _integrate_one(self, coord, datetime_unit=None, cumulative=False):
from xarray.core.variable import Variable

if coord not in self.variables and coord not in self.dims:
raise ValueError(f"Coordinate {coord} does not exist.")
variables_and_dims = tuple(set(self.variables.keys()).union(self.dims))
raise ValueError(
f"Coordinate {coord!r} not found in variables or dimensions {variables_and_dims}."
)

coord_var = self[coord].variable
if coord_var.ndim != 1:
Expand Down Expand Up @@ -9771,7 +9784,9 @@ def drop_duplicates(

missing_dims = set(dims) - set(self.dims)
if missing_dims:
raise ValueError(f"'{missing_dims}' not found in dimensions")
raise ValueError(
f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}"
)

indexes = {dim: ~self.get_index(dim).duplicated(keep=keep) for dim in dims}
return self.isel(indexes)
Expand Down
6 changes: 3 additions & 3 deletions xarray/core/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1203,12 +1203,12 @@ def sel(self, labels, method=None, tolerance=None) -> IndexSelResult:
coord_name, label = next(iter(labels.items()))

if is_dict_like(label):
invalid_levels = [
invalid_levels = tuple(
name for name in label if name not in self.index.names
]
)
if invalid_levels:
raise ValueError(
f"invalid multi-index level names {invalid_levels}"
f"multi-index level names {invalid_levels} not found in indexes {tuple(self.index.names)}"
)
return self.sel(label)

Expand Down
5 changes: 4 additions & 1 deletion xarray/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,10 @@ def group_indexers_by_index(
elif key in obj.coords:
raise KeyError(f"no index found for coordinate {key!r}")
elif key not in obj.dims:
raise KeyError(f"{key!r} is not a valid dimension or coordinate")
raise KeyError(
f"{key!r} is not a valid dimension or coordinate for "
f"{obj.__class__.__name__} with dimensions {obj.dims!r}"
)
elif len(options):
raise ValueError(
f"cannot supply selection options {options!r} for dimension {key!r}"
Expand Down
18 changes: 13 additions & 5 deletions xarray/core/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,14 @@ def __init__(
self.center = self._mapping_to_list(center, default=False)
self.obj: T_Xarray = obj

missing_dims = tuple(dim for dim in self.dim if dim not in self.obj.dims)
if missing_dims:
# NOTE: we raise KeyError here but ValueError in Coarsen.
raise KeyError(
f"Window dimensions {missing_dims} not found in {self.obj.__class__.__name__} "
f"dimensions {tuple(self.obj.dims)}"
)

# attributes
if min_periods is not None and min_periods <= 0:
raise ValueError("min_periods must be greater than zero or None")
Expand Down Expand Up @@ -624,8 +632,7 @@ def __init__(
xarray.DataArray.groupby
"""
super().__init__(obj, windows, min_periods, center)
if any(d not in self.obj.dims for d in self.dim):
raise KeyError(self.dim)

# Keep each Rolling object as a dictionary
self.rollings = {}
for key, da in self.obj.data_vars.items():
Expand Down Expand Up @@ -839,10 +846,11 @@ def __init__(
self.side = side
self.boundary = boundary

absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims]
if absent_dims:
missing_dims = tuple(dim for dim in windows.keys() if dim not in self.obj.dims)
if missing_dims:
raise ValueError(
f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}."
f"Window dimensions {missing_dims} not found in {self.obj.__class__.__name__} "
f"dimensions {tuple(self.obj.dims)}"
)
if not utils.is_dict_like(coord_func):
coord_func = {d: coord_func for d in self.obj.dims} # type: ignore[misc]
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -2117,7 +2117,7 @@ def concat(
for var in variables:
if var.dims != first_var_dims:
raise ValueError(
f"Variable has dimensions {list(var.dims)} but first Variable has dimensions {list(first_var_dims)}"
f"Variable has dimensions {tuple(var.dims)} but first Variable has dimensions {tuple(first_var_dims)}"
)

return cls(dims, data, attrs, encoding, fastpath=True)
Expand Down
5 changes: 3 additions & 2 deletions xarray/core/weighted.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,11 @@ def _check_dim(self, dim: Dims):
dims = [dim] if dim else []
else:
dims = list(dim)
missing_dims = set(dims) - set(self.obj.dims) - set(self.weights.dims)
all_dims = set(self.obj.dims).union(set(self.weights.dims))
missing_dims = set(dims) - all_dims
if missing_dims:
raise ValueError(
f"{self.__class__.__name__} does not contain the dimensions: {missing_dims}"
f"Dimensions {tuple(missing_dims)} not found in {self.__class__.__name__} dimensions {tuple(all_dims)}"
)

@staticmethod
Expand Down
5 changes: 4 additions & 1 deletion xarray/tests/test_coarsen.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@


def test_coarsen_absent_dims_error(ds: Dataset) -> None:
with pytest.raises(ValueError, match=r"not found in Dataset."):
with pytest.raises(
ValueError,
match=r"Window dimensions \('foo',\) not found in Dataset dimensions",
):
ds.coarsen(foo=2)


Expand Down
5 changes: 4 additions & 1 deletion xarray/tests/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,9 +614,12 @@ def test_concat_errors(self):
with pytest.raises(ValueError, match=r"must supply at least one"):
concat([], "dim1")

with pytest.raises(ValueError, match=r"are not coordinates"):
with pytest.raises(ValueError, match=r"are not found in the coordinates"):
concat([data, data], "new_dim", coords=["not_found"])

with pytest.raises(ValueError, match=r"are not found in the data variables"):
concat([data, data], "new_dim", data_vars=["not_found"])

with pytest.raises(ValueError, match=r"global attributes not"):
# call deepcopy seperately to get unique attrs
data0 = deepcopy(split_data[0])
Expand Down
5 changes: 5 additions & 0 deletions xarray/tests/test_coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ def test_delitem(self) -> None:
del coords["x"]
assert "x" not in coords

with pytest.raises(
KeyError, match="'nonexistent' is not in coordinate variables"
):
del coords["nonexistent"]

def test_update(self) -> None:
coords = Coordinates(coords={"x": [0, 1, 2]})

Expand Down
Loading

0 comments on commit 0afbd45

Please sign in to comment.