Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pandas=2.0 support #7724

Merged
merged 29 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
7fcb376
unpin pandas in the package metadata
keewis Apr 5, 2023
414ca7d
unpin pandas in the ci environments [skip-rtd]
keewis Apr 5, 2023
e8b6aaa
create the input arrays in the parametrization
keewis Apr 6, 2023
db315e2
split `test_sel_float` into variants
keewis Apr 6, 2023
04a2073
skip the float16 variant if pandas>=2.0 is installed
keewis Apr 6, 2023
f00e776
[skip-rtd]
keewis Apr 6, 2023
d4e666c
Merge branch 'main' into unpin-pandas
keewis Apr 6, 2023
10922a1
add tests for `days_in_month` and its alias
keewis Apr 6, 2023
f02f069
make sure the name and dtype match the expected
keewis Apr 6, 2023
4c5c055
actually verify that the dtype stays the same
keewis Apr 6, 2023
ad956f8
apply the dtype for non-dask
keewis Apr 6, 2023
0a01aec
always use `int32` to follow `pandas=2.0`
keewis Apr 6, 2023
16fdcf8
back to `int64`
keewis Apr 6, 2023
e4db746
same for the test
keewis Apr 6, 2023
422f08b
final undo of `int64` → `int32`
keewis Apr 6, 2023
00f5b90
update the comment to make more sense
keewis Apr 6, 2023
7e5f719
simplify the conversion of the expected data
keewis Apr 6, 2023
6a522cb
change back to the old condition
keewis Apr 6, 2023
7803e66
cast float16 to float64 when creating indexes (but warn anyways)
keewis Apr 6, 2023
1792307
convert float16 to float64 when selecting using arrays
keewis Apr 6, 2023
230cb85
move the float16 variant to a separate test
keewis Apr 6, 2023
144ad7f
explicitly type the kwargs as a mapping of str → str
keewis Apr 7, 2023
54832cf
reword the warning message
keewis Apr 7, 2023
2f586aa
Merge branch 'main' into unpin-pandas
keewis Apr 11, 2023
68f6c00
restore the pin
keewis Apr 12, 2023
3e6f38a
Merge branch 'main' into unpin-pandas
keewis Apr 12, 2023
f165c6c
[skip-ci] [skip-rtd]
keewis Apr 12, 2023
47b3b99
rerun to make sure we don't introduce failures with `pandas<2`
keewis Apr 12, 2023
ec15fb4
changelog
keewis Apr 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ New Features
- Added ability to save ``DataArray`` objects directly to Zarr using :py:meth:`~xarray.DataArray.to_zarr`.
(:issue:`7692`, :pull:`7693`) .
By `Joe Hamman <https://github.com/jhamman>`_.
- Support `pandas>=2.0` (:pull:`7724`)
By `Justus Magin <https://github.com/keewis>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/accessor_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def _get_date_field(values, name, dtype):
access_method, values, name, dtype=dtype, new_axis=new_axis, chunks=chunks
)
else:
return access_method(values, name)
return access_method(values, name).astype(dtype)
dcherian marked this conversation as resolved.
Show resolved Hide resolved


def _round_through_series_or_index(values, name, freq):
Expand Down
28 changes: 24 additions & 4 deletions xarray/core/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@
PandasIndexingAdapter,
PandasMultiIndexingAdapter,
)
from xarray.core.utils import Frozen, get_valid_numpy_dtype, is_dict_like, is_scalar
from xarray.core.utils import (
Frozen,
emit_user_level_warning,
get_valid_numpy_dtype,
is_dict_like,
is_scalar,
)

if TYPE_CHECKING:
from xarray.core.types import ErrorOptions, T_Index
Expand Down Expand Up @@ -166,9 +172,21 @@ def safe_cast_to_index(array: Any) -> pd.Index:
elif isinstance(array, PandasIndexingAdapter):
index = array.array
else:
kwargs = {}
if hasattr(array, "dtype") and array.dtype.kind == "O":
kwargs["dtype"] = object
kwargs: dict[str, str] = {}
if hasattr(array, "dtype"):
if array.dtype.kind == "O":
kwargs["dtype"] = "object"
elif array.dtype == "float16":
emit_user_level_warning(
(
"`pandas.Index` does not support the `float16` dtype."
" Casting to `float64` for you, but in the future please"
" manually cast to either `float32` and `float64`."
),
category=DeprecationWarning,
)
kwargs["dtype"] = "float64"

index = pd.Index(np.asarray(array), **kwargs)

return _maybe_cast_to_cftimeindex(index)
Expand Down Expand Up @@ -259,6 +277,8 @@ def get_indexer_nd(index, labels, method=None, tolerance=None):
labels
"""
flat_labels = np.ravel(labels)
if flat_labels.dtype == "float16":
flat_labels = flat_labels.astype("float64")
flat_indexer = index.get_indexer(flat_labels, method=method, tolerance=tolerance)
indexer = flat_indexer.reshape(labels.shape)
return indexer
Expand Down
18 changes: 16 additions & 2 deletions xarray/tests/test_accessor_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ def setup(self):
"quarter",
"date",
"time",
"daysinmonth",
"days_in_month",
dcherian marked this conversation as resolved.
Show resolved Hide resolved
"is_month_start",
"is_month_end",
"is_quarter_start",
Expand All @@ -74,7 +76,18 @@ def test_field_access(self, field) -> None:
else:
data = getattr(self.times, field)

expected = xr.DataArray(data, name=field, coords=[self.times], dims=["time"])
if data.dtype.kind != "b" and field not in ("date", "time"):
# pandas 2.0 returns int32 for integer fields now
data = data.astype("int64")
dcherian marked this conversation as resolved.
Show resolved Hide resolved

translations = {
"weekday": "dayofweek",
"daysinmonth": "days_in_month",
"weekofyear": "week",
}
name = translations.get(field, field)

expected = xr.DataArray(data, name=name, coords=[self.times], dims=["time"])

if field in ["week", "weekofyear"]:
with pytest.warns(
Expand All @@ -84,7 +97,8 @@ def test_field_access(self, field) -> None:
else:
actual = getattr(self.data.time.dt, field)

assert_equal(expected, actual)
assert expected.dtype == actual.dtype
assert_identical(expected, actual)

@pytest.mark.parametrize(
"field, pandas_field",
Expand Down
63 changes: 42 additions & 21 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,32 +1023,53 @@ def test_sel_dataarray_datetime_slice(self) -> None:
result = array.sel(delta=slice(array.delta[0], array.delta[-1]))
assert_equal(result, array)

def test_sel_float(self) -> None:
@pytest.mark.parametrize(
["coord_values", "indices"],
(
pytest.param(
np.array([0.0, 0.111, 0.222, 0.333], dtype="float64"),
slice(1, 3),
id="float64",
),
pytest.param(
np.array([0.0, 0.111, 0.222, 0.333], dtype="float32"),
slice(1, 3),
id="float32",
),
pytest.param(
np.array([0.0, 0.111, 0.222, 0.333], dtype="float32"), [2], id="scalar"
),
),
)
def test_sel_float(self, coord_values, indices) -> None:
data_values = np.arange(4)

# case coords are float32 and label is list of floats
float_values = [0.0, 0.111, 0.222, 0.333]
coord_values = np.asarray(float_values, dtype="float32")
array = DataArray(data_values, [("float32_coord", coord_values)])
expected = DataArray(data_values[1:3], [("float32_coord", coord_values[1:3])])
actual = array.sel(float32_coord=float_values[1:3])
# case coords are float16 and label is list of floats
coord_values_16 = np.asarray(float_values, dtype="float16")
expected_16 = DataArray(
data_values[1:3], [("float16_coord", coord_values_16[1:3])]
)
array_16 = DataArray(data_values, [("float16_coord", coord_values_16)])
actual_16 = array_16.sel(float16_coord=float_values[1:3])
arr = DataArray(data_values, coords={"x": coord_values}, dims="x")

# case coord, label are scalars
expected_scalar = DataArray(
data_values[2], coords={"float32_coord": coord_values[2]}
actual = arr.sel(x=coord_values[indices])
expected = DataArray(
data_values[indices], coords={"x": coord_values[indices]}, dims="x"
)
actual_scalar = array.sel(float32_coord=float_values[2])

assert_equal(expected, actual)
assert_equal(expected_scalar, actual_scalar)
assert_equal(expected_16, actual_16)
assert_equal(actual, expected)

def test_sel_float16(self) -> None:
data_values = np.arange(4)
coord_values = np.array([0.0, 0.111, 0.222, 0.333], dtype="float16")
indices = slice(1, 3)

message = "`pandas.Index` does not support the `float16` dtype.*"

with pytest.warns(DeprecationWarning, match=message):
arr = DataArray(data_values, coords={"x": coord_values}, dims="x")
with pytest.warns(DeprecationWarning, match=message):
expected = DataArray(
data_values[indices], coords={"x": coord_values[indices]}, dims="x"
)

actual = arr.sel(x=coord_values[indices])

assert_equal(actual, expected)

def test_sel_float_multiindex(self) -> None:
# regression test https://github.com/pydata/xarray/issues/5691
Expand Down
8 changes: 5 additions & 3 deletions xarray/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@ def new_method():


@pytest.mark.parametrize(
"a, b, expected", [["a", "b", np.array(["a", "b"])], [1, 2, pd.Index([1, 2])]]
["a", "b", "expected"],
[
[np.array(["a"]), np.array(["b"]), np.array(["a", "b"])],
[np.array([1], dtype="int64"), np.array([2], dtype="int64"), pd.Index([1, 2])],
Comment on lines +28 to +29
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here, the idea is to avoid the different default precision on windows by explicitly setting the int precision on construction

],
)
def test_maybe_coerce_to_str(a, b, expected):
a = np.array([a])
b = np.array([b])
index = pd.Index(a).append(pd.Index(b))

actual = utils.maybe_coerce_to_str(index, [a, b])
Expand Down