From 3719ba7ca6d66dbb1c7a105907beb13e8d826e37 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 19 Apr 2024 11:11:25 -0400 Subject: [PATCH 01/15] test expand_dims doesn't create Index --- xarray/tests/test_dataset.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index a948fafc815..54a7b0a6f87 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3431,6 +3431,23 @@ def test_expand_dims_kwargs_python36plus(self) -> None: ) assert_identical(other_way_expected, other_way) + def test_expand_dims_creates_indexvariable(self): + # data variables should not gain an index ever + ds = Dataset({"a": 0}) + for flag in [True, False]: + expanded = ds.expand_dims("x", create_1d_index=flag) + expected = Dataset({"a": ("x", [0])}) + assert_identical(expanded, expected) + assert expanded.indexes == {} + + # coordinate variables should gain an index only if create_1d_index is True (the default) + ds = Dataset(coords={"x": 0}) + expanded = ds.expand_dims("x") + expected = Dataset({"x": ("x", [0])}) + assert_identical(expanded, expected) + expanded_no_index = ds.expand_dims("x", create_1d_index=False) + assert expanded_no_index.indexes == {} + @requires_pandas_version_two def test_expand_dims_non_nanosecond_conversion(self) -> None: # Regression test for https://github.com/pydata/xarray/issues/7493#issuecomment-1953091000 From 018e74bb1c94141cd86502bfa2ac964a44f28860 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 19 Apr 2024 11:40:06 -0400 Subject: [PATCH 02/15] add option to not create 1D index in expand_dims --- xarray/core/dataset.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 96f3be00995..233b7833785 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4497,6 +4497,7 @@ def expand_dims( self, dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None, axis: None | int | Sequence[int] = None, + create_1d_index: bool = True, **dim_kwargs: Any, ) -> Self: """Return a new object with an additional axis (or axes) inserted at @@ -4506,6 +4507,8 @@ def expand_dims( If dim is already a scalar coordinate, it will be promoted to a 1D coordinate consisting of a single value. + The + Parameters ---------- dim : hashable, sequence of hashable, mapping, or None @@ -4521,6 +4524,8 @@ def expand_dims( multiple axes are inserted. In this case, dim arguments should be same length list. If axis=None is passed, all the axes will be inserted to the start of the result array. + create_1d_index : bool, default is True + Whether to create new PandasIndex objects for new 1D coordinate variables. **dim_kwargs : int or sequence or ndarray The keywords are arbitrary dimensions being inserted and the values are either the lengths of the new dims (if int is given), or their @@ -4640,6 +4645,8 @@ def expand_dims( # save the coordinates to the variables dict, and set the # value within the dim dict to the length of the iterable # for later use. + + # TODO should we have an option to not create a variable here? index = PandasIndex(v, k) indexes[k] = index variables.update(index.create_variables()) @@ -4678,11 +4685,16 @@ def expand_dims( variables[k] = v.set_dims(dict(all_dims)) else: if k not in variables: - # If dims includes a label of a non-dimension coordinate, - # it will be promoted to a 1D coordinate with a single value. - index, index_vars = create_default_index_implicit(v.set_dims(k)) - indexes[k] = index - variables.update(index_vars) + if create_1d_index: + # If dims includes a label of a non-dimension coordinate, + # it will be promoted to a 1D coordinate with a single value. + index, index_vars = create_default_index_implicit(v.set_dims(k)) + indexes[k] = index + variables.update(index_vars) + else: + # create 1D variable without creating a new index + new_1d_var = v.set_dims(k) + variables.update({k: new_1d_var}) return self._replace_with_new_dims( variables, coord_names=coord_names, indexes=indexes From f680505d3dd630bc8fa0a200e5d9efde7cf8ada9 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 19 Apr 2024 22:47:07 -0400 Subject: [PATCH 03/15] refactor tests to consider data variables and coordinate variables separately --- xarray/tests/test_dataset.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 54a7b0a6f87..74c4af1790d 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3431,21 +3431,34 @@ def test_expand_dims_kwargs_python36plus(self) -> None: ) assert_identical(other_way_expected, other_way) - def test_expand_dims_creates_indexvariable(self): + @pytest.mark.parametrize("create_1d_index_flag", [True, False]) + def test_expand_dims_create_index_data_variable(self, create_1d_index_flag): # data variables should not gain an index ever - ds = Dataset({"a": 0}) - for flag in [True, False]: - expanded = ds.expand_dims("x", create_1d_index=flag) - expected = Dataset({"a": ("x", [0])}) - assert_identical(expanded, expected) - assert expanded.indexes == {} + ds = Dataset({"x": 0}) + expanded = ds.expand_dims("x", create_1d_index=create_1d_index_flag) + + # TODO I can't just create the expected dataset directly using constructor because of GH issue 8959 + # expected = Dataset(data_vars={"x": ("x", [0])}) + expected = Dataset({"x": ("x", [0])}).drop_indexes("x").reset_coords("x") + + # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks + # assert_identical(expanded, expected) + assert expected.data_vars == {"x": Variable(data=[0], dims=["x"])} + assert expanded.indexes == {} + def test_expand_dims_create_index_coordinate_variable(self): # coordinate variables should gain an index only if create_1d_index is True (the default) ds = Dataset(coords={"x": 0}) expanded = ds.expand_dims("x") expected = Dataset({"x": ("x", [0])}) assert_identical(expanded, expected) + expanded_no_index = ds.expand_dims("x", create_1d_index=False) + expected = Dataset(coords={"x": ("x", [0])}).drop_indexes("x") + + # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks + # assert_identical(expanded, expected) + assert expanded_no_index.coords == {"x": Variable(data=[0], dims=["x"])} assert expanded_no_index.indexes == {} @requires_pandas_version_two From f10509aa983a4baae66163030b26827ef6c11344 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 19 Apr 2024 23:22:11 -0400 Subject: [PATCH 04/15] fix bug causing new test to fail --- xarray/core/dataset.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 233b7833785..9752afbd749 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4507,8 +4507,9 @@ def expand_dims( If dim is already a scalar coordinate, it will be promoted to a 1D coordinate consisting of a single value. - The - + The automatic creation of indexes to back new 1D coordinate variables + controlled by the create_1d_index kwarg. + Parameters ---------- dim : hashable, sequence of hashable, mapping, or None @@ -4525,7 +4526,7 @@ def expand_dims( same length list. If axis=None is passed, all the axes will be inserted to the start of the result array. create_1d_index : bool, default is True - Whether to create new PandasIndex objects for new 1D coordinate variables. + Whether to create new PandasIndex objects for any new 1D coordinate variables. **dim_kwargs : int or sequence or ndarray The keywords are arbitrary dimensions being inserted and the values are either the lengths of the new dims (if int is given), or their @@ -4685,7 +4686,7 @@ def expand_dims( variables[k] = v.set_dims(dict(all_dims)) else: if k not in variables: - if create_1d_index: + if k in coord_names and create_1d_index: # If dims includes a label of a non-dimension coordinate, # it will be promoted to a 1D coordinate with a single value. index, index_vars = create_default_index_implicit(v.set_dims(k)) From 8152c0a40c4386eb01cd1974769527ae5c85a570 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 19 Apr 2024 23:51:28 -0400 Subject: [PATCH 05/15] test index auto-creation when iterable passed as new coordinate values --- xarray/tests/test_dataset.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 74c4af1790d..feee69bf49a 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -11,6 +11,7 @@ from typing import Any, Literal import numpy as np +import numpy.testing as npt import pandas as pd import pytest from pandas.core.indexes.datetimes import DatetimeIndex @@ -3461,6 +3462,22 @@ def test_expand_dims_create_index_coordinate_variable(self): assert expanded_no_index.coords == {"x": Variable(data=[0], dims=["x"])} assert expanded_no_index.indexes == {} + def test_expand_dims_create_index_from_iterable(self): + ds = Dataset(coords={"x": 0}) + expanded = ds.expand_dims(x=[0, 1]) + expected = Dataset({"x": ("x", [0, 1])}) + assert_identical(expanded, expected) + + expanded_no_index = ds.expand_dims(x=[0, 1], create_1d_index=False) + expected = Dataset(coords={"x": ("x", [0, 1])}).drop_indexes("x") + + # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks + # assert_identical(expanded, expected) + assert list(expanded_no_index.coords) == ["x"] + assert isinstance(expanded_no_index.coords["x"].variable, Variable) + npt.assert_array_equal(expanded_no_index.coords["x"].data, np.array([0, 1])) + assert expanded_no_index.indexes == {} + @requires_pandas_version_two def test_expand_dims_non_nanosecond_conversion(self) -> None: # Regression test for https://github.com/pydata/xarray/issues/7493#issuecomment-1953091000 From aa813cff0d6011af81cb4a15c87e9857f35c3d26 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 20 Apr 2024 00:06:27 -0400 Subject: [PATCH 06/15] make test for iterable pass --- xarray/core/dataset.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 9752afbd749..25cf8263bf5 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4647,10 +4647,13 @@ def expand_dims( # value within the dim dict to the length of the iterable # for later use. - # TODO should we have an option to not create a variable here? - index = PandasIndex(v, k) - indexes[k] = index - variables.update(index.create_variables()) + if create_1d_index: + index = PandasIndex(v, k) + indexes[k] = index + name_and_new_1d_var = index.create_variables() + else: + name_and_new_1d_var = {k: Variable(data=v, dims=k)} + variables.update(name_and_new_1d_var) coord_names.add(k) dim[k] = variables[k].size elif isinstance(v, int): From e78de7d5e93649114d9aaa916bf0f4185882be9c Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 20 Apr 2024 00:36:37 -0400 Subject: [PATCH 07/15] added kwarg to dataarray --- xarray/core/dataarray.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 509962ff80d..818275dedd3 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2557,6 +2557,7 @@ def expand_dims( self, dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None, axis: None | int | Sequence[int] = None, + create_1d_index: bool = True, **dim_kwargs: Any, ) -> Self: """Return a new object with an additional axis (or axes) inserted at @@ -2566,6 +2567,9 @@ def expand_dims( If dim is already a scalar coordinate, it will be promoted to a 1D coordinate consisting of a single value. + The automatic creation of indexes to back new 1D coordinate variables + controlled by the create_1d_index kwarg. + Parameters ---------- dim : Hashable, sequence of Hashable, dict, or None, optional @@ -2581,6 +2585,8 @@ def expand_dims( multiple axes are inserted. In this case, dim arguments should be same length list. If axis=None is passed, all the axes will be inserted to the start of the result array. + create_1d_index : bool, default is True + Whether to create new PandasIndex objects for any new 1D coordinate variables. **dim_kwargs : int or sequence or ndarray The keywords are arbitrary dimensions being inserted and the values are either the lengths of the new dims (if int is given), or their @@ -2644,7 +2650,9 @@ def expand_dims( dim = {dim: 1} dim = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") - ds = self._to_temp_dataset().expand_dims(dim, axis) + ds = self._to_temp_dataset().expand_dims( + dim, axis, create_1d_index=create_1d_index + ) return self._from_temp_dataset(ds) def set_index( From b1329cc08c2b08cf032c6a6ddb4da8ff99bbbee7 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 20 Apr 2024 01:22:51 -0400 Subject: [PATCH 08/15] whatsnew --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2332f7f236b..ffa6f562e41 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -29,6 +29,9 @@ New Features for example, will retain the object. However, one cannot do operations that are not possible on the `ExtensionArray` then, such as broadcasting. By `Ilan Gold `_. +- Added the option to avoid automatically creating 1D pandas indexes in :py:meth:`Dataset.expand_dims()`, by passing the new kwarg + `create_1d_index=False`. (:pull:`8960`) + By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ From a9f7e0c7c177f16371819963f16388537fead51d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 20 Apr 2024 05:23:22 +0000 Subject: [PATCH 09/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ffa6f562e41..7ecbc8fc3e6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,7 +30,7 @@ New Features then, such as broadcasting. By `Ilan Gold `_. - Added the option to avoid automatically creating 1D pandas indexes in :py:meth:`Dataset.expand_dims()`, by passing the new kwarg - `create_1d_index=False`. (:pull:`8960`) + `create_1d_index=False`. (:pull:`8960`) By `Tom Nicholas `_. Breaking changes From 62e750f46c55209b904cc14ece174fa58cce546a Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 26 Apr 2024 11:31:59 -0400 Subject: [PATCH 10/15] update tests to use private versions of assertions --- xarray/tests/test_dataset.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index feee69bf49a..4b1e2141750 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -11,7 +11,6 @@ from typing import Any, Literal import numpy as np -import numpy.testing as npt import pandas as pd import pytest from pandas.core.indexes.datetimes import DatetimeIndex @@ -3438,13 +3437,10 @@ def test_expand_dims_create_index_data_variable(self, create_1d_index_flag): ds = Dataset({"x": 0}) expanded = ds.expand_dims("x", create_1d_index=create_1d_index_flag) - # TODO I can't just create the expected dataset directly using constructor because of GH issue 8959 - # expected = Dataset(data_vars={"x": ("x", [0])}) + # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959 expected = Dataset({"x": ("x", [0])}).drop_indexes("x").reset_coords("x") - # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks - # assert_identical(expanded, expected) - assert expected.data_vars == {"x": Variable(data=[0], dims=["x"])} + assert_identical(expanded, expected, check_default_indexes=False) assert expanded.indexes == {} def test_expand_dims_create_index_coordinate_variable(self): @@ -3455,11 +3451,11 @@ def test_expand_dims_create_index_coordinate_variable(self): assert_identical(expanded, expected) expanded_no_index = ds.expand_dims("x", create_1d_index=False) + + # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959 expected = Dataset(coords={"x": ("x", [0])}).drop_indexes("x") - # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks - # assert_identical(expanded, expected) - assert expanded_no_index.coords == {"x": Variable(data=[0], dims=["x"])} + assert_identical(expanded_no_index, expected, check_default_indexes=False) assert expanded_no_index.indexes == {} def test_expand_dims_create_index_from_iterable(self): @@ -3469,13 +3465,11 @@ def test_expand_dims_create_index_from_iterable(self): assert_identical(expanded, expected) expanded_no_index = ds.expand_dims(x=[0, 1], create_1d_index=False) + + # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959 expected = Dataset(coords={"x": ("x", [0, 1])}).drop_indexes("x") - # TODO also can't just assert equivalence because it will fail internal invariants default indexes checks - # assert_identical(expanded, expected) - assert list(expanded_no_index.coords) == ["x"] - assert isinstance(expanded_no_index.coords["x"].variable, Variable) - npt.assert_array_equal(expanded_no_index.coords["x"].data, np.array([0, 1])) + assert_identical(expanded, expected, check_default_indexes=False) assert expanded_no_index.indexes == {} @requires_pandas_version_two From f86c82f1c9efdf0b207f1fb537aea2cefe287c82 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 26 Apr 2024 11:34:37 -0400 Subject: [PATCH 11/15] create_1d_index->create_index --- xarray/core/dataarray.py | 10 ++++------ xarray/core/dataset.py | 10 +++++----- xarray/tests/test_dataset.py | 12 ++++++------ 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 818275dedd3..41c9af1bb10 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2557,7 +2557,7 @@ def expand_dims( self, dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None, axis: None | int | Sequence[int] = None, - create_1d_index: bool = True, + create_index: bool = True, **dim_kwargs: Any, ) -> Self: """Return a new object with an additional axis (or axes) inserted at @@ -2568,7 +2568,7 @@ def expand_dims( coordinate consisting of a single value. The automatic creation of indexes to back new 1D coordinate variables - controlled by the create_1d_index kwarg. + controlled by the create_index kwarg. Parameters ---------- @@ -2585,7 +2585,7 @@ def expand_dims( multiple axes are inserted. In this case, dim arguments should be same length list. If axis=None is passed, all the axes will be inserted to the start of the result array. - create_1d_index : bool, default is True + create_index : bool, default is True Whether to create new PandasIndex objects for any new 1D coordinate variables. **dim_kwargs : int or sequence or ndarray The keywords are arbitrary dimensions being inserted and the values @@ -2650,9 +2650,7 @@ def expand_dims( dim = {dim: 1} dim = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") - ds = self._to_temp_dataset().expand_dims( - dim, axis, create_1d_index=create_1d_index - ) + ds = self._to_temp_dataset().expand_dims(dim, axis, create_index=create_index) return self._from_temp_dataset(ds) def set_index( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 25cf8263bf5..a2bc3505366 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4497,7 +4497,7 @@ def expand_dims( self, dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None, axis: None | int | Sequence[int] = None, - create_1d_index: bool = True, + create_index: bool = True, **dim_kwargs: Any, ) -> Self: """Return a new object with an additional axis (or axes) inserted at @@ -4508,7 +4508,7 @@ def expand_dims( coordinate consisting of a single value. The automatic creation of indexes to back new 1D coordinate variables - controlled by the create_1d_index kwarg. + controlled by the create_index kwarg. Parameters ---------- @@ -4525,7 +4525,7 @@ def expand_dims( multiple axes are inserted. In this case, dim arguments should be same length list. If axis=None is passed, all the axes will be inserted to the start of the result array. - create_1d_index : bool, default is True + create_index : bool, default is True Whether to create new PandasIndex objects for any new 1D coordinate variables. **dim_kwargs : int or sequence or ndarray The keywords are arbitrary dimensions being inserted and the values @@ -4647,7 +4647,7 @@ def expand_dims( # value within the dim dict to the length of the iterable # for later use. - if create_1d_index: + if create_index: index = PandasIndex(v, k) indexes[k] = index name_and_new_1d_var = index.create_variables() @@ -4689,7 +4689,7 @@ def expand_dims( variables[k] = v.set_dims(dict(all_dims)) else: if k not in variables: - if k in coord_names and create_1d_index: + if k in coord_names and create_index: # If dims includes a label of a non-dimension coordinate, # it will be promoted to a 1D coordinate with a single value. index, index_vars = create_default_index_implicit(v.set_dims(k)) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 4b1e2141750..e4251ced7a4 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3431,11 +3431,11 @@ def test_expand_dims_kwargs_python36plus(self) -> None: ) assert_identical(other_way_expected, other_way) - @pytest.mark.parametrize("create_1d_index_flag", [True, False]) - def test_expand_dims_create_index_data_variable(self, create_1d_index_flag): + @pytest.mark.parametrize("create_index_flag", [True, False]) + def test_expand_dims_create_index_data_variable(self, create_index_flag): # data variables should not gain an index ever ds = Dataset({"x": 0}) - expanded = ds.expand_dims("x", create_1d_index=create_1d_index_flag) + expanded = ds.expand_dims("x", create_index=create_index_flag) # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959 expected = Dataset({"x": ("x", [0])}).drop_indexes("x").reset_coords("x") @@ -3444,13 +3444,13 @@ def test_expand_dims_create_index_data_variable(self, create_1d_index_flag): assert expanded.indexes == {} def test_expand_dims_create_index_coordinate_variable(self): - # coordinate variables should gain an index only if create_1d_index is True (the default) + # coordinate variables should gain an index only if create_index is True (the default) ds = Dataset(coords={"x": 0}) expanded = ds.expand_dims("x") expected = Dataset({"x": ("x", [0])}) assert_identical(expanded, expected) - expanded_no_index = ds.expand_dims("x", create_1d_index=False) + expanded_no_index = ds.expand_dims("x", create_index=False) # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959 expected = Dataset(coords={"x": ("x", [0])}).drop_indexes("x") @@ -3464,7 +3464,7 @@ def test_expand_dims_create_index_from_iterable(self): expected = Dataset({"x": ("x", [0, 1])}) assert_identical(expanded, expected) - expanded_no_index = ds.expand_dims(x=[0, 1], create_1d_index=False) + expanded_no_index = ds.expand_dims(x=[0, 1], create_index=False) # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959 expected = Dataset(coords={"x": ("x", [0, 1])}).drop_indexes("x") From d5d90fdd27673537d47d7acae883571e0604ee91 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Fri, 26 Apr 2024 09:38:47 -0600 Subject: [PATCH 12/15] Update doc/whats-new.rst Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ce05e41243a..2231f0a7d4c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,7 +30,7 @@ New Features then, such as broadcasting. By `Ilan Gold `_. - Added the option to avoid automatically creating 1D pandas indexes in :py:meth:`Dataset.expand_dims()`, by passing the new kwarg - `create_1d_index=False`. (:pull:`8960`) + `create_index=False`. (:pull:`8960`) By `Tom Nicholas `_. Breaking changes From 7e8f895c2746e2a75979b5675967b434aef09ee6 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 26 Apr 2024 23:31:08 -0400 Subject: [PATCH 13/15] warn if create_index=True but no index created because dimension variable was a data var not a coord --- xarray/core/dataset.py | 7 +++++++ xarray/tests/test_dataset.py | 7 ++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a2bc3505366..3985d2cad1a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4696,6 +4696,13 @@ def expand_dims( indexes[k] = index variables.update(index_vars) else: + if create_index: + warnings.warn( + f"No index created for dimension {k} because variable {k} is not a coordinate. " + f"To create an index for {k}, please first call `.set_coords({k})` on this object.", + UserWarning, + ) + # create 1D variable without creating a new index new_1d_var = v.set_dims(k) variables.update({k: new_1d_var}) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index e4251ced7a4..17ef63d826e 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3435,7 +3435,12 @@ def test_expand_dims_kwargs_python36plus(self) -> None: def test_expand_dims_create_index_data_variable(self, create_index_flag): # data variables should not gain an index ever ds = Dataset({"x": 0}) - expanded = ds.expand_dims("x", create_index=create_index_flag) + + if create_index_flag: + with pytest.warns(UserWarning, match="No index created"): + expanded = ds.expand_dims("x", create_index=create_index_flag) + else: + expanded = ds.expand_dims("x", create_index=create_index_flag) # TODO Can't just create the expected dataset directly using constructor because of GH issue 8959 expected = Dataset({"x": ("x", [0])}).drop_indexes("x").reset_coords("x") From ed85446d54ea8791f9224b71da833fd0f5d9c1eb Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 26 Apr 2024 23:32:53 -0400 Subject: [PATCH 14/15] add string marks in warning message --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3985d2cad1a..4f9125a1ab0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4699,7 +4699,7 @@ def expand_dims( if create_index: warnings.warn( f"No index created for dimension {k} because variable {k} is not a coordinate. " - f"To create an index for {k}, please first call `.set_coords({k})` on this object.", + f"To create an index for {k}, please first call `.set_coords('{k}')` on this object.", UserWarning, ) From 6bde1587a0da18f856c1f1e12425475ce8f99d30 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 27 Apr 2024 12:27:15 -0400 Subject: [PATCH 15/15] fix bad merge --- xarray/tests/test_dataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 79e4d74f84a..40cf85484da 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3476,7 +3476,6 @@ def test_expand_dims_create_index_from_iterable(self): assert_identical(expanded, expected, check_default_indexes=False) assert expanded_no_index.indexes == {} - @requires_pandas_version_two def test_expand_dims_non_nanosecond_conversion(self) -> None: # Regression test for https://github.com/pydata/xarray/issues/7493#issuecomment-1953091000 with pytest.warns(UserWarning, match="non-nanosecond precision"):