Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consistently report all dimensions in error messages if invalid dimensions are given #8079

Merged
merged 13 commits into from
Sep 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ Documentation
Internal Changes
~~~~~~~~~~~~~~~~

- Many error messages related to invalid dimensions or coordinates now always show the list of valid dims/coords (:pull:`8079`).
By `András Gunyhó <https://github.com/mgunyho>`_.

.. _whats-new.2023.08.0:

Expand Down
8 changes: 6 additions & 2 deletions xarray/core/computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2046,9 +2046,13 @@ def _calc_idxminmax(
raise ValueError("Must supply 'dim' argument for multidimensional arrays")

if dim not in array.dims:
raise KeyError(f'Dimension "{dim}" not in dimension')
raise KeyError(
f"Dimension {dim!r} not found in array dimensions {array.dims!r}"
dcherian marked this conversation as resolved.
Show resolved Hide resolved
)
if dim not in array.coords:
raise KeyError(f'Dimension "{dim}" does not have coordinates')
raise KeyError(
f"Dimension {dim!r} is not one of the coordinates {tuple(array.coords.keys())}"
)

# These are dtypes with NaN values argmin and argmax can handle
na_dtypes = "cfO"
Expand Down
13 changes: 8 additions & 5 deletions xarray/core/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,17 +391,20 @@ def process_subset_opt(opt, subset):
else:
raise ValueError(f"unexpected value for {subset}: {opt}")
else:
invalid_vars = [k for k in opt if k not in getattr(datasets[0], subset)]
valid_vars = tuple(getattr(datasets[0], subset))
invalid_vars = [k for k in opt if k not in valid_vars]
if invalid_vars:
if subset == "coords":
raise ValueError(
"some variables in coords are not coordinates on "
f"the first dataset: {invalid_vars}"
f"the variables {invalid_vars} in coords are not "
f"found in the coordinates of the first dataset {valid_vars}"
)
else:
# note: data_vars are not listed in the error message here,
# because there may be lots of them
raise ValueError(
"some variables in data_vars are not data variables "
f"on the first dataset: {invalid_vars}"
f"the variables {invalid_vars} in data_vars are not "
f"found in the data variables of the first dataset"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
f"found in the data variables of the first dataset"
f"found in the data variables of the first dataset {valid_vars}"

Copy link
Contributor Author

@mgunyho mgunyho Aug 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here I explicitly left out listing the data variables, because it's more likely to have many of them like in #5546 (see also the comment in the code right above this)

)
concat_over.update(opt)

Expand Down
8 changes: 6 additions & 2 deletions xarray/core/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,9 @@ def __delitem__(self, key: Hashable) -> None:
if key in self:
del self._data[key]
else:
raise KeyError(f"{key!r} is not a coordinate variable.")
raise KeyError(
f"{key!r} is not in coordinate variables {tuple(self.keys())}"
)

def _ipython_key_completions_(self):
"""Provide method for the key-autocompletions in IPython."""
Expand Down Expand Up @@ -855,7 +857,9 @@ def to_dataset(self) -> Dataset:

def __delitem__(self, key: Hashable) -> None:
if key not in self:
raise KeyError(f"{key!r} is not a coordinate variable.")
raise KeyError(
f"{key!r} is not in coordinate variables {tuple(self.keys())}"
)
assert_no_index_corrupted(self._data.xindexes, {key})

del self._data._coords[key]
Expand Down
69 changes: 42 additions & 27 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,11 +213,6 @@ def _get_virtual_variable(
return ref_name, var_name, virtual_var


def _assert_empty(args: tuple, msg: str = "%s") -> None:
if args:
raise ValueError(msg % args)


def _get_chunk(var: Variable, chunks, chunkmanager: ChunkManagerEntrypoint):
"""
Return map from each dim to chunk sizes, accounting for backend's preferred chunks.
Expand Down Expand Up @@ -2640,7 +2635,7 @@ def chunk(
bad_dims = chunks.keys() - self.dims.keys()
if bad_dims:
raise ValueError(
f"some chunks keys are not dimensions on this object: {bad_dims}"
f"chunks keys {tuple(bad_dims)} not found in data dimensions {tuple(self.dims)}"
)

chunkmanager = guess_chunkmanager(chunked_array_type)
Expand Down Expand Up @@ -4243,8 +4238,8 @@ def rename_dims(
for k, v in dims_dict.items():
if k not in self.dims:
raise ValueError(
f"cannot rename {k!r} because it is not a "
"dimension in this dataset"
f"cannot rename {k!r} because it is not found "
f"in the dimensions of this dataset {tuple(self.dims)}"
)
if v in self.dims or v in self:
raise ValueError(
Expand Down Expand Up @@ -4366,7 +4361,7 @@ def swap_dims(
if k not in self.dims:
raise ValueError(
f"cannot swap from dimension {k!r} because it is "
"not an existing dimension"
f"not one of the dimensions of this dataset {tuple(self.dims)}"
)
if v in self.variables and self.variables[v].dims != (k,):
raise ValueError(
Expand Down Expand Up @@ -5448,10 +5443,10 @@ def unstack(
else:
dims = list(dim)

missing_dims = [d for d in dims if d not in self.dims]
missing_dims = set(dims) - set(self.dims)
if missing_dims:
raise ValueError(
f"Dataset does not contain the dimensions: {missing_dims}"
f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}"
)

# each specified dimension must have exactly one multi-index
Expand Down Expand Up @@ -5836,7 +5831,10 @@ def drop_indexes(
if errors == "raise":
invalid_coords = coord_names - self._coord_names
if invalid_coords:
raise ValueError(f"those coordinates don't exist: {invalid_coords}")
raise ValueError(
f"The coordinates {tuple(invalid_coords)} are not found in the "
f"dataset coordinates {tuple(self.coords.keys())}"
)

unindexed_coords = set(coord_names) - set(self._indexes)
if unindexed_coords:
Expand Down Expand Up @@ -6084,7 +6082,7 @@ def drop_dims(
missing_dims = drop_dims - set(self.dims)
if missing_dims:
raise ValueError(
f"Dataset does not contain the dimensions: {missing_dims}"
f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}"
)

drop_vars = {k for k, v in self._variables.items() if set(v.dims) & drop_dims}
Expand Down Expand Up @@ -6244,7 +6242,9 @@ def dropna(
# depending on the order of the supplied axes.

if dim not in self.dims:
raise ValueError(f"{dim} must be a single dataset dimension")
raise ValueError(
f"Dimension {dim!r} not found in data dimensions {tuple(self.dims)}"
)

if subset is None:
subset = iter(self.data_vars)
Expand Down Expand Up @@ -6725,10 +6725,10 @@ def reduce(
else:
dims = set(dim)

missing_dimensions = [d for d in dims if d not in self.dims]
missing_dimensions = tuple(d for d in dims if d not in self.dims)
if missing_dimensions:
raise ValueError(
f"Dataset does not contain the dimensions: {missing_dimensions}"
f"Dimensions {missing_dimensions} not found in data dimensions {tuple(self.dims)}"
)

if keep_attrs is None:
Expand Down Expand Up @@ -7710,9 +7710,11 @@ def shift(
foo (x) object nan nan 'a' 'b' 'c'
"""
shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "shift")
invalid = [k for k in shifts if k not in self.dims]
invalid = tuple(k for k in shifts if k not in self.dims)
if invalid:
raise ValueError(f"dimensions {invalid!r} do not exist")
raise ValueError(
f"Dimensions {invalid} not found in data dimensions {tuple(self.dims)}"
)

variables = {}
for name, var in self.variables.items():
Expand Down Expand Up @@ -7789,7 +7791,9 @@ def roll(
shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "roll")
invalid = [k for k in shifts if k not in self.dims]
if invalid:
raise ValueError(f"dimensions {invalid!r} do not exist")
raise ValueError(
f"Dimensions {invalid} not found in data dimensions {tuple(self.dims)}"
)

unrolled_vars: tuple[Hashable, ...]

Expand Down Expand Up @@ -8038,10 +8042,11 @@ def quantile(
else:
dims = set(dim)

_assert_empty(
tuple(d for d in dims if d not in self.dims),
"Dataset does not contain the dimensions: %s",
)
invalid_dims = set(dims) - set(self.dims)
if invalid_dims:
raise ValueError(
f"Dimensions {tuple(invalid_dims)} not found in data dimensions {tuple(self.dims)}"
)

q = np.asarray(q, dtype=np.float64)

Expand Down Expand Up @@ -8117,7 +8122,9 @@ def rank(
)

if dim not in self.dims:
raise ValueError(f"Dataset does not contain the dimension: {dim}")
raise ValueError(
f"Dimension {dim!r} not found in data dimensions {tuple(self.dims)}"
)

variables = {}
for name, var in self.variables.items():
Expand Down Expand Up @@ -8167,7 +8174,10 @@ def differentiate(
from xarray.core.variable import Variable

if coord not in self.variables and coord not in self.dims:
raise ValueError(f"Coordinate {coord} does not exist.")
variables_and_dims = tuple(set(self.variables.keys()).union(self.dims))
raise ValueError(
f"Coordinate {coord!r} not found in variables or dimensions {variables_and_dims}."
)

coord_var = self[coord].variable
if coord_var.ndim != 1:
Expand Down Expand Up @@ -8269,7 +8279,10 @@ def _integrate_one(self, coord, datetime_unit=None, cumulative=False):
from xarray.core.variable import Variable

if coord not in self.variables and coord not in self.dims:
raise ValueError(f"Coordinate {coord} does not exist.")
variables_and_dims = tuple(set(self.variables.keys()).union(self.dims))
raise ValueError(
f"Coordinate {coord!r} not found in variables or dimensions {variables_and_dims}."
)

coord_var = self[coord].variable
if coord_var.ndim != 1:
Expand Down Expand Up @@ -9771,7 +9784,9 @@ def drop_duplicates(

missing_dims = set(dims) - set(self.dims)
if missing_dims:
raise ValueError(f"'{missing_dims}' not found in dimensions")
raise ValueError(
f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}"
)

indexes = {dim: ~self.get_index(dim).duplicated(keep=keep) for dim in dims}
return self.isel(indexes)
Expand Down
6 changes: 3 additions & 3 deletions xarray/core/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1203,12 +1203,12 @@ def sel(self, labels, method=None, tolerance=None) -> IndexSelResult:
coord_name, label = next(iter(labels.items()))

if is_dict_like(label):
invalid_levels = [
invalid_levels = tuple(
name for name in label if name not in self.index.names
]
)
if invalid_levels:
raise ValueError(
f"invalid multi-index level names {invalid_levels}"
f"multi-index level names {invalid_levels} not found in indexes {tuple(self.index.names)}"
)
return self.sel(label)

Expand Down
5 changes: 4 additions & 1 deletion xarray/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,10 @@ def group_indexers_by_index(
elif key in obj.coords:
raise KeyError(f"no index found for coordinate {key!r}")
elif key not in obj.dims:
raise KeyError(f"{key!r} is not a valid dimension or coordinate")
raise KeyError(
f"{key!r} is not a valid dimension or coordinate for "
f"{obj.__class__.__name__} with dimensions {obj.dims!r}"
)
elif len(options):
raise ValueError(
f"cannot supply selection options {options!r} for dimension {key!r}"
Expand Down
18 changes: 13 additions & 5 deletions xarray/core/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,14 @@ def __init__(
self.center = self._mapping_to_list(center, default=False)
self.obj: T_Xarray = obj

missing_dims = tuple(dim for dim in self.dim if dim not in self.obj.dims)
if missing_dims:
# NOTE: we raise KeyError here but ValueError in Coarsen.
raise KeyError(
f"Window dimensions {missing_dims} not found in {self.obj.__class__.__name__} "
f"dimensions {tuple(self.obj.dims)}"
)

# attributes
if min_periods is not None and min_periods <= 0:
raise ValueError("min_periods must be greater than zero or None")
Expand Down Expand Up @@ -624,8 +632,7 @@ def __init__(
xarray.DataArray.groupby
"""
super().__init__(obj, windows, min_periods, center)
if any(d not in self.obj.dims for d in self.dim):
raise KeyError(self.dim)

# Keep each Rolling object as a dictionary
self.rollings = {}
for key, da in self.obj.data_vars.items():
Expand Down Expand Up @@ -839,10 +846,11 @@ def __init__(
self.side = side
self.boundary = boundary

absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims]
if absent_dims:
missing_dims = tuple(dim for dim in windows.keys() if dim not in self.obj.dims)
if missing_dims:
raise ValueError(
f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}."
f"Window dimensions {missing_dims} not found in {self.obj.__class__.__name__} "
f"dimensions {tuple(self.obj.dims)}"
)
if not utils.is_dict_like(coord_func):
coord_func = {d: coord_func for d in self.obj.dims} # type: ignore[misc]
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -2117,7 +2117,7 @@ def concat(
for var in variables:
if var.dims != first_var_dims:
raise ValueError(
f"Variable has dimensions {list(var.dims)} but first Variable has dimensions {list(first_var_dims)}"
f"Variable has dimensions {tuple(var.dims)} but first Variable has dimensions {tuple(first_var_dims)}"
)

return cls(dims, data, attrs, encoding, fastpath=True)
Expand Down
5 changes: 3 additions & 2 deletions xarray/core/weighted.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,11 @@ def _check_dim(self, dim: Dims):
dims = [dim] if dim else []
else:
dims = list(dim)
missing_dims = set(dims) - set(self.obj.dims) - set(self.weights.dims)
all_dims = set(self.obj.dims).union(set(self.weights.dims))
missing_dims = set(dims) - all_dims
if missing_dims:
raise ValueError(
f"{self.__class__.__name__} does not contain the dimensions: {missing_dims}"
f"Dimensions {tuple(missing_dims)} not found in {self.__class__.__name__} dimensions {tuple(all_dims)}"
)

@staticmethod
Expand Down
5 changes: 4 additions & 1 deletion xarray/tests/test_coarsen.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@


def test_coarsen_absent_dims_error(ds: Dataset) -> None:
with pytest.raises(ValueError, match=r"not found in Dataset."):
with pytest.raises(
ValueError,
match=r"Window dimensions \('foo',\) not found in Dataset dimensions",
):
ds.coarsen(foo=2)


Expand Down
5 changes: 4 additions & 1 deletion xarray/tests/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,9 +614,12 @@ def test_concat_errors(self):
with pytest.raises(ValueError, match=r"must supply at least one"):
concat([], "dim1")

with pytest.raises(ValueError, match=r"are not coordinates"):
with pytest.raises(ValueError, match=r"are not found in the coordinates"):
concat([data, data], "new_dim", coords=["not_found"])

with pytest.raises(ValueError, match=r"are not found in the data variables"):
concat([data, data], "new_dim", data_vars=["not_found"])

with pytest.raises(ValueError, match=r"global attributes not"):
# call deepcopy seperately to get unique attrs
data0 = deepcopy(split_data[0])
Expand Down
5 changes: 5 additions & 0 deletions xarray/tests/test_coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ def test_delitem(self) -> None:
del coords["x"]
assert "x" not in coords

with pytest.raises(
KeyError, match="'nonexistent' is not in coordinate variables"
):
del coords["nonexistent"]

def test_update(self) -> None:
coords = Coordinates(coords={"x": [0, 1, 2]})

Expand Down
Loading
Loading