From e4557cbcf803865ff9333b0c6fa45c966b530518 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 10 Mar 2023 13:30:36 -0600 Subject: [PATCH] Update minimum `pandas` and `numpy` pinnings (#12887) This PR: - [x] Increments the minimum pinning for `pandas` version from `1.0` to `1.3`. - [x] Sets a minimum pinning for `numpy` as `>=1.21` - [x] Fixes arm conda environment creation by removing `pandoc` version constraint. Resolves #12785. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Bradley Dice (https://github.com/bdice) - Ray Douglass (https://github.com/raydouglass) - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/12887 --- .../all_cuda-118_arch-x86_64.yaml | 6 +- conda/recipes/cudf/meta.yaml | 4 +- dependencies.yaml | 6 +- python/cudf/cudf/core/_compat.py | 4 - python/cudf/cudf/core/column/datetime.py | 6 +- python/cudf/cudf/core/dtypes.py | 12 +- python/cudf/cudf/core/multiindex.py | 4 +- python/cudf/cudf/testing/testing.py | 36 ++--- python/cudf/cudf/tests/test_categorical.py | 3 +- python/cudf/cudf/tests/test_dataframe.py | 25 +-- python/cudf/cudf/tests/test_dtypes.py | 8 +- python/cudf/cudf/tests/test_groupby.py | 13 +- python/cudf/cudf/tests/test_index.py | 33 +--- python/cudf/cudf/tests/test_indexing.py | 23 +-- python/cudf/cudf/tests/test_joining.py | 8 +- python/cudf/cudf/tests/test_json.py | 14 +- python/cudf/cudf/tests/test_multiindex.py | 3 +- python/cudf/cudf/tests/test_repr.py | 7 - python/cudf/cudf/tests/test_reshape.py | 15 +- python/cudf/cudf/tests/test_rolling.py | 38 ++--- python/cudf/cudf/tests/test_series.py | 11 +- python/cudf/cudf/tests/test_setitem.py | 6 +- python/cudf/cudf/tests/test_string.py | 150 ++++++------------ python/cudf/cudf/tests/test_timedelta.py | 49 +----- python/cudf/cudf/utils/dtypes.py | 17 +- python/cudf/pyproject.toml | 4 +- .../dask_cudf/dask_cudf/tests/test_groupby.py | 17 +- python/dask_cudf/pyproject.toml | 8 +- 28 files changed, 131 insertions(+), 399 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 67e2dc4720e..6f9734eb314 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -41,13 +41,13 @@ dependencies: - ninja - notebook - numba>=0.56.2 -- numpy +- numpy>=1.21 - numpydoc - nvcc_linux-64=11.8 - nvtx>=0.2.1 - packaging -- pandas>=1.0,<1.6.0dev0 -- pandoc<=2.0.0 +- pandas>=1.3,<1.6.0dev0 +- pandoc - pip - pre-commit - protobuf>=4.21.6,<4.22 diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index e0f33ad40c7..6b23c8953d3 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -62,10 +62,10 @@ requirements: - protobuf >=4.21.6,<4.22 - python - typing_extensions - - pandas >=1.0,<1.6.0dev0 + - pandas >=1.3,<1.6.0dev0 - cupy >=9.5.0,<12.0.0a0 - numba >=0.56.2 - - numpy + - numpy >=1.21 - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} - libcudf {{ version }} - fastavro >=0.22.0 diff --git a/dependencies.yaml b/dependencies.yaml index 4bac8148b10..48b5bfe53d4 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -152,7 +152,7 @@ dependencies: - myst-nb - nbsphinx - numpydoc - - pandoc<=2.0.0 # We should check and fix all "<=" pinnings + - pandoc - pydata-sphinx-theme - sphinx - sphinx-autobuild @@ -254,10 +254,10 @@ dependencies: - distributed>=2023.1.1 - fsspec>=0.6.0 - numba>=0.56.2 - - numpy + - numpy>=1.21 - nvtx>=0.2.1 - packaging - - pandas>=1.0,<1.6.0dev0 + - pandas>=1.3,<1.6.0dev0 - python-confluent-kafka=1.7.0 - streamz - typing_extensions diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py index 722f9677db0..6ecbe414ebb 100644 --- a/python/cudf/cudf/core/_compat.py +++ b/python/cudf/cudf/core/_compat.py @@ -4,10 +4,6 @@ from packaging import version PANDAS_VERSION = version.parse(pd.__version__) -PANDAS_GE_110 = PANDAS_VERSION >= version.parse("1.1") -PANDAS_GE_120 = PANDAS_VERSION >= version.parse("1.2") -PANDAS_LE_122 = PANDAS_VERSION <= version.parse("1.2.2") -PANDAS_GE_130 = PANDAS_VERSION >= version.parse("1.3.0") PANDAS_GE_133 = PANDAS_VERSION >= version.parse("1.3.3") PANDAS_GE_134 = PANDAS_VERSION >= version.parse("1.3.4") PANDAS_LT_140 = PANDAS_VERSION < version.parse("1.4.0") diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 0c546168fe3..14aa7bdd84b 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -21,16 +21,12 @@ ScalarLike, ) from cudf.api.types import is_datetime64_dtype, is_scalar, is_timedelta64_dtype -from cudf.core._compat import PANDAS_GE_120 from cudf.core.buffer import Buffer, cuda_array_interface_wrapper from cudf.core.column import ColumnBase, as_column, column, string from cudf.core.column.timedelta import _unit_to_nanoseconds_conversion from cudf.utils.utils import _fillna_natwise -if PANDAS_GE_120: - _guess_datetime_format = pd.core.tools.datetimes.guess_datetime_format -else: - _guess_datetime_format = pd.core.tools.datetimes._guess_datetime_format +_guess_datetime_format = pd.core.tools.datetimes.guess_datetime_format # nanoseconds per time_unit _dtype_to_format_conversion = { diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index 963f13acf10..d6edd6af093 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -19,7 +19,7 @@ import cudf from cudf._typing import Dtype -from cudf.core._compat import PANDAS_GE_130, PANDAS_GE_150 +from cudf.core._compat import PANDAS_GE_150 from cudf.core.abc import Serializable from cudf.core.buffer import Buffer from cudf.utils.docutils import doc_apply @@ -875,16 +875,10 @@ def to_arrow(self): @classmethod def from_pandas(cls, pd_dtype: pd.IntervalDtype) -> "IntervalDtype": - if PANDAS_GE_130: - return cls(subtype=pd_dtype.subtype, closed=pd_dtype.closed) - else: - return cls(subtype=pd_dtype.subtype) + return cls(subtype=pd_dtype.subtype, closed=pd_dtype.closed) def to_pandas(self) -> pd.IntervalDtype: - if PANDAS_GE_130: - return pd.IntervalDtype(subtype=self.subtype, closed=self.closed) - else: - return pd.IntervalDtype(subtype=self.subtype) + return pd.IntervalDtype(subtype=self.subtype, closed=self.closed) def __eq__(self, other): if isinstance(other, str): diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 1ce4cc218f8..4a9bc89fa34 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -21,7 +21,7 @@ from cudf._typing import DataFrameOrSeries from cudf.api.types import is_integer, is_list_like, is_object_dtype from cudf.core import column -from cudf.core._compat import PANDAS_GE_120, PANDAS_GE_150 +from cudf.core._compat import PANDAS_GE_150 from cudf.core.frame import Frame from cudf.core.index import ( BaseIndex, @@ -495,7 +495,7 @@ def __repr__(self): ) ) - if PANDAS_GE_120 and not PANDAS_GE_150: + if not PANDAS_GE_150: # Need this whole `if` block, # this is a workaround for the following issue: # https://github.com/pandas-dev/pandas/issues/39984 diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py index a8428c2647b..484c013f774 100644 --- a/python/cudf/cudf/testing/testing.py +++ b/python/cudf/cudf/testing/testing.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from __future__ import annotations @@ -19,7 +19,6 @@ is_string_dtype, is_struct_dtype, ) -from cudf.core._compat import PANDAS_GE_110 from cudf.core.missing import NA @@ -699,28 +698,17 @@ def assert_frame_equal( obj=f"{obj}.index", ) - if PANDAS_GE_110: - pd.testing.assert_index_equal( - left._data.to_pandas_index(), - right._data.to_pandas_index(), - exact=check_column_type, - check_names=check_names, - check_exact=check_exact, - check_categorical=check_categorical, - rtol=rtol, - atol=atol, - obj=f"{obj}.columns", - ) - else: - pd.testing.assert_index_equal( - left._data.to_pandas_index(), - right._data.to_pandas_index(), - exact=check_column_type, - check_names=check_names, - check_exact=check_exact, - check_categorical=check_categorical, - obj=f"{obj}.columns", - ) + pd.testing.assert_index_equal( + left._data.to_pandas_index(), + right._data.to_pandas_index(), + exact=check_column_type, + check_names=check_names, + check_exact=check_exact, + check_categorical=check_categorical, + rtol=rtol, + atol=atol, + obj=f"{obj}.columns", + ) for col in left._column_names: assert_column_equal( diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py index 496039ca2f8..2c8226e4fe5 100644 --- a/python/cudf/cudf/tests/test_categorical.py +++ b/python/cudf/cudf/tests/test_categorical.py @@ -11,7 +11,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_134 +from cudf.core._compat import PANDAS_GE_134 from cudf.testing._utils import ( NUMERIC_TYPES, assert_eq, @@ -81,7 +81,6 @@ def test_categorical_basic(): assert_eq(cat.codes, cudf_cat.codes.to_numpy()) -@pytest.mark.skipif(not PANDAS_GE_110, reason="requires pandas>=1.1.0") def test_categorical_integer(): cat = pd.Categorical(["a", "_", "_", "c", "a"], categories=["a", "b", "c"]) pdsr = pd.Series(cat) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 7ddfa3a7f48..6a79555d43e 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -22,13 +22,7 @@ from packaging import version import cudf -from cudf.core._compat import ( - PANDAS_GE_110, - PANDAS_GE_120, - PANDAS_GE_134, - PANDAS_GE_150, - PANDAS_LT_140, -) +from cudf.core._compat import PANDAS_GE_134, PANDAS_GE_150, PANDAS_LT_140 from cudf.core.buffer.spill_manager import get_global_manager from cudf.core.column import column from cudf.testing import _utils as utils @@ -3227,10 +3221,6 @@ def test_dataframe_reindex_fill_value( @pytest.mark.parametrize("copy", [True, False]) def test_dataframe_reindex_change_dtype(copy): - if PANDAS_GE_110: - kwargs = {"check_freq": False} - else: - kwargs = {} index = pd.date_range("12/29/2009", periods=10, freq="D") columns = ["a", "b", "c", "d", "e"] gdf = cudf.datasets.randomdata( @@ -3242,7 +3232,7 @@ def test_dataframe_reindex_change_dtype(copy): assert_eq( pdf.reindex(index=index, columns=columns, copy=True), gdf.reindex(index=index, columns=columns, copy=copy), - **kwargs, + check_freq=False, ) @@ -4632,10 +4622,6 @@ def test_isin_dataframe(data, values): else: try: expected = pdf.isin(values) - except ValueError as e: - if str(e) == "Lengths must match." and not PANDAS_GE_110: - # https://github.com/pandas-dev/pandas/issues/34256 - return except TypeError as e: # Can't do isin with different categories if str(e) == ( @@ -5302,12 +5288,7 @@ def test_rowwise_ops_datetime_dtypes_pdbug(data): expected = pdf.max(axis=1, skipna=False) got = gdf.max(axis=1, skipna=False) - if PANDAS_GE_120: - assert_eq(got, expected) - else: - # PANDAS BUG: https://github.com/pandas-dev/pandas/issues/36907 - with pytest.raises(AssertionError, match="numpy array are different"): - assert_eq(got, expected) + assert_eq(got, expected) @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py index 2f8e1ac5c2f..6e24099f1a8 100644 --- a/python/cudf/cudf/tests/test_dtypes.py +++ b/python/cudf/cudf/tests/test_dtypes.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. import numpy as np import pandas as pd @@ -6,7 +6,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_130, PANDAS_GE_150 +from cudf.core._compat import PANDAS_GE_150 from cudf.core.column import ColumnBase from cudf.core.dtypes import ( CategoricalDtype, @@ -187,10 +187,6 @@ def test_interval_dtype_pyarrow_round_trip(subtype, closed): assert expect.equals(got) -@pytest.mark.skipif( - not PANDAS_GE_130, - reason="pandas<1.3.0 doesn't have a closed argument for IntervalDtype", -) def test_interval_dtype_from_pandas(subtype, closed): expect = cudf.IntervalDtype(subtype, closed=closed) pd_type = pd.IntervalDtype(subtype, closed=closed) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 97700779a89..0751ef7ca67 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -15,12 +15,7 @@ import cudf from cudf import DataFrame, Series -from cudf.core._compat import ( - PANDAS_GE_110, - PANDAS_GE_130, - PANDAS_GE_150, - PANDAS_LT_140, -) +from cudf.core._compat import PANDAS_GE_150, PANDAS_LT_140 from cudf.core.udf.groupby_typing import SUPPORTED_GROUPBY_NUMPY_TYPES from cudf.testing._utils import ( DATETIME_TYPES, @@ -573,7 +568,7 @@ def test_groupby_2keys_agg(nelem, func): # "func", ["min", "max", "idxmin", "idxmax", "count", "sum"], ) @pytest.mark.xfail( - condition=PANDAS_GE_130 and PANDAS_LT_140, + condition=PANDAS_LT_140, reason="https://github.com/pandas-dev/pandas/issues/43209", ) def test_groupby_agg_decimal(num_groups, nelem_per_group, func): @@ -1507,9 +1502,6 @@ def test_groupby_median(agg, by): @pytest.mark.parametrize("agg", [lambda x: x.nunique(), "nunique"]) @pytest.mark.parametrize("by", ["a", ["a", "b"], ["a", "c"]]) -@pytest.mark.xfail( - condition=not PANDAS_GE_110, reason="pandas >= 1.1 required" -) def test_groupby_nunique(agg, by): pdf = pd.DataFrame( {"a": [1, 1, 1, 2, 3], "b": [1, 2, 2, 2, 1], "c": [1, 2, None, 4, 5]} @@ -1545,7 +1537,6 @@ def test_groupby_nth(n, by): @pytest.mark.xfail( - condition=PANDAS_GE_130, reason="https://github.com/pandas-dev/pandas/issues/43209", ) def test_raise_data_error(): diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 29601cbd203..d043b917251 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -11,7 +11,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_133, PANDAS_GE_200 +from cudf.core._compat import PANDAS_GE_133, PANDAS_GE_200 from cudf.core.index import ( CategoricalIndex, DatetimeIndex, @@ -811,17 +811,6 @@ def test_index_difference(data, other, sort): gd_data = cudf.core.index.as_index(data) gd_other = cudf.core.index.as_index(other) - if ( - gd_data.dtype.kind == "f" - and gd_other.dtype.kind != "f" - or (gd_data.dtype.kind != "f" and gd_other.dtype.kind == "f") - ): - pytest.mark.xfail( - condition=not PANDAS_GE_110, - reason="Bug in Pandas: " - "https://github.com/pandas-dev/pandas/issues/35217", - ) - expected = pd_data.difference(pd_other, sort=sort) actual = gd_data.difference(gd_other, sort=sort) assert_eq(expected, actual) @@ -880,15 +869,6 @@ def test_index_equals(data, other): gd_data = cudf.core.index.as_index(data) gd_other = cudf.core.index.as_index(other) - if ( - gd_data.dtype.kind == "f" or gd_other.dtype.kind == "f" - ) and cudf.utils.dtypes.is_mixed_with_object_dtype(gd_data, gd_other): - pytest.mark.xfail( - condition=not PANDAS_GE_110, - reason="Bug in Pandas: " - "https://github.com/pandas-dev/pandas/issues/35217", - ) - expected = pd_data.equals(pd_other) actual = gd_data.equals(gd_other) assert_eq(expected, actual) @@ -935,17 +915,6 @@ def test_index_categories_equal(data, other): gd_data = cudf.core.index.as_index(data).astype("category") gd_other = cudf.core.index.as_index(other) - if ( - gd_data.dtype.kind == "f" - and gd_other.dtype.kind != "f" - or (gd_data.dtype.kind != "f" and gd_other.dtype.kind == "f") - ): - pytest.mark.xfail( - condition=not PANDAS_GE_110, - reason="Bug in Pandas: " - "https://github.com/pandas-dev/pandas/issues/35217", - ) - expected = pd_data.equals(pd_other) actual = gd_data.equals(gd_other) assert_eq(expected, actual) diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index 634466e92a3..5012ae0979f 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -8,7 +8,6 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_120 from cudf.testing import _utils as utils from cudf.testing._utils import ( INTEGER_TYPES, @@ -451,10 +450,6 @@ def test_series_loc_string(): def test_series_loc_datetime(): - if PANDAS_GE_110: - kwargs = {"check_freq": False} - else: - kwargs = {} ps = pd.Series( [1, 2, 3, 4, 5], index=pd.date_range("20010101", "20010105") ) @@ -475,11 +470,11 @@ def test_series_loc_datetime(): assert_eq( ps.loc["2001-01-02":"2001-01-05"], gs.loc["2001-01-02":"2001-01-05"], - **kwargs, + check_freq=False, ) - assert_eq(ps.loc["2001-01-02":], gs.loc["2001-01-02":], **kwargs) - assert_eq(ps.loc[:"2001-01-04"], gs.loc[:"2001-01-04"], **kwargs) - assert_eq(ps.loc[::2], gs.loc[::2], **kwargs) + assert_eq(ps.loc["2001-01-02":], gs.loc["2001-01-02":], check_freq=False) + assert_eq(ps.loc[:"2001-01-04"], gs.loc[:"2001-01-04"], check_freq=False) + assert_eq(ps.loc[::2], gs.loc[::2], check_freq=False) assert_eq( ps.loc[["2001-01-01", "2001-01-04", "2001-01-05"]], @@ -505,13 +500,15 @@ def test_series_loc_datetime(): assert_eq( ps.loc[[True, False, True, False, True]], gs.loc[[True, False, True, False, True]], - **kwargs, + check_freq=False, ) just_less_than_max = ps.index.max() - pd.Timedelta("5m") assert_eq( - ps.loc[:just_less_than_max], gs.loc[:just_less_than_max], **kwargs + ps.loc[:just_less_than_max], + gs.loc[:just_less_than_max], + check_freq=False, ) @@ -1012,10 +1009,6 @@ def test_series_setitem_datetime(): assert_eq(psr, gsr) -@pytest.mark.xfail( - condition=not PANDAS_GE_120, - reason="Pandas will coerce to object datatype here", -) def test_series_setitem_datetime_coerced(): psr = pd.Series(["2001", "2002", "2003"], dtype="datetime64[ns]") gsr = cudf.from_pandas(psr) diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py index 94da7a50c2e..b197e91882a 100644 --- a/python/cudf/cudf/tests/test_joining.py +++ b/python/cudf/cudf/tests/test_joining.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. from itertools import combinations, product, repeat @@ -7,7 +7,6 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_120 from cudf.core.dtypes import CategoricalDtype, Decimal64Dtype, Decimal128Dtype from cudf.testing._utils import ( INTEGER_TYPES, @@ -545,11 +544,6 @@ def test_empty_joins(how, left_empty, right_empty): assert len(expected) == len(result) -@pytest.mark.xfail( - condition=not PANDAS_GE_120, - reason="left_on/right_on produces undefined results with 0" - "index and is disabled", -) def test_merge_left_index_zero(): left = pd.DataFrame({"x": [1, 2, 3, 4, 5, 6]}, index=[0, 1, 2, 3, 4, 5]) right = pd.DataFrame( diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py index b778db4465f..8dcab37d20a 100644 --- a/python/cudf/cudf/tests/test_json.py +++ b/python/cudf/cudf/tests/test_json.py @@ -13,7 +13,6 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_110 from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, @@ -165,18 +164,7 @@ def test_json_writer(tmpdir, pdf, gdf): assert os.path.exists(pdf_series_fname) assert os.path.exists(gdf_series_fname) - try: - # xref 'https://github.com/pandas-dev/pandas/pull/33373' - expect_series = pd.read_json(pdf_series_fname, typ="series") - except TypeError as e: - if ( - not PANDAS_GE_110 - and str(e) == " is not convertible to datetime" - ): - continue - else: - raise e - + expect_series = pd.read_json(pdf_series_fname, typ="series") got_series = pd.read_json(gdf_series_fname, typ="series") assert_eq(expect_series, got_series) diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index bd9f36a595d..0f04e8c0f2d 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -16,7 +16,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_130, PANDAS_GE_200 +from cudf.core._compat import PANDAS_GE_200 from cudf.core.column import as_column from cudf.core.index import as_index from cudf.testing._utils import ( @@ -1102,7 +1102,6 @@ def test_multicolumn_loc(pdf, pdfIndex): @pytest.mark.xfail( - condition=PANDAS_GE_130, reason="https://github.com/pandas-dev/pandas/issues/43351", ) def test_multicolumn_set_item(pdf, pdfIndex): diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py index bae0fde6463..e7fa401f1ec 100644 --- a/python/cudf/cudf/tests/test_repr.py +++ b/python/cudf/cudf/tests/test_repr.py @@ -9,7 +9,6 @@ from hypothesis import given, settings, strategies as st import cudf -from cudf.core._compat import PANDAS_GE_110 from cudf.testing import _utils as utils from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes @@ -600,9 +599,6 @@ def test_series_null_index_repr(sr, pandas_special_case): ], ) @pytest.mark.parametrize("dtype", ["timedelta64[s]", "timedelta64[us]"]) -@pytest.mark.xfail( - condition=not PANDAS_GE_110, reason="pandas >= 1.1 required" -) def test_timedelta_series_s_us_repr(data, dtype): sr = cudf.Series(data, dtype=dtype) psr = sr.to_pandas() @@ -1103,9 +1099,6 @@ def test_timedelta_dataframe_repr(df, expected_repr): ), ], ) -@pytest.mark.xfail( - condition=not PANDAS_GE_110, reason="pandas >= 1.1 required" -) def test_timedelta_index_repr(index, expected_repr): actual_repr = repr(index) diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py index 37ffbab1676..78e95fdbd81 100644 --- a/python/cudf/cudf/tests/test_reshape.py +++ b/python/cudf/cudf/tests/test_reshape.py @@ -1,6 +1,7 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. import re +from itertools import chain import numpy as np import pandas as pd @@ -8,7 +9,6 @@ import cudf from cudf import melt as cudf_melt -from cudf.core._compat import PANDAS_GE_120 from cudf.core.buffer.spill_manager import get_global_manager from cudf.testing._utils import ( ALL_TYPES, @@ -86,16 +86,7 @@ def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype): @pytest.mark.parametrize("num_cols", [1, 2, 10]) @pytest.mark.parametrize("num_rows", [1, 2, 1000]) @pytest.mark.parametrize( - "dtype", - list(NUMERIC_TYPES + DATETIME_TYPES) - + [ - pytest.param( - "str", - marks=pytest_xfail( - condition=not PANDAS_GE_120, reason="pandas bug" - ), - ) - ], + "dtype", list(chain(NUMERIC_TYPES, DATETIME_TYPES, ["str"])) ) @pytest.mark.parametrize("nulls", ["none", "some"]) def test_df_stack(nulls, num_cols, num_rows, dtype): diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py index 08188c25ffa..62120619d94 100644 --- a/python/cudf/cudf/tests/test_rolling.py +++ b/python/cudf/cudf/tests/test_rolling.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. import math from contextlib import contextmanager @@ -8,12 +8,7 @@ import pytest import cudf -from cudf.core._compat import ( - PANDAS_GE_110, - PANDAS_GE_130, - PANDAS_GE_150, - PANDAS_LT_140, -) +from cudf.core._compat import PANDAS_GE_150, PANDAS_LT_140 from cudf.testing._utils import _create_pandas_series, assert_eq from cudf.testing.dataset_generator import rand_dataframe @@ -48,10 +43,7 @@ def _hide_pandas_rolling_min_periods_warning(agg): @pytest.mark.parametrize("center", [True, False]) def test_rolling_series_basic(data, index, agg, nulls, center): rng = np.random.default_rng(1) - if PANDAS_GE_110: - kwargs = {"check_freq": False} - else: - kwargs = {} + if len(data) > 0: if nulls == "one": p = rng.integers(0, len(data)) @@ -73,7 +65,7 @@ def test_rolling_series_basic(data, index, agg, nulls, center): got = getattr( gsr.rolling(window_size, min_periods, center), agg )().fillna(-1) - assert_eq(expect, got, check_dtype=False, **kwargs) + assert_eq(expect, got, check_dtype=False, check_freq=False) @pytest.mark.parametrize( @@ -159,10 +151,6 @@ def test_rolling_with_offset(agg): @pytest.mark.parametrize("seed", [100, 2000]) @pytest.mark.parametrize("window_size", [2, 10, 100]) def test_rolling_var_std_large(agg, ddof, center, seed, window_size): - if PANDAS_GE_110: - kwargs = {"check_freq": False} - else: - kwargs = {} iupper_bound = math.sqrt(np.iinfo(np.int64).max / window_size) ilower_bound = -math.sqrt(abs(np.iinfo(np.int64).min) / window_size) @@ -214,15 +202,11 @@ def test_rolling_var_std_large(agg, ddof, center, seed, window_size): mask = (got[col].fillna(-1) != 0).to_pandas() expect[col] = expect[col][mask] got[col] = got[col][mask] - assert_eq(expect[col], got[col], **kwargs) + assert_eq(expect[col], got[col], check_freq=False) else: - assert_eq(expect, got, **kwargs) + assert_eq(expect, got, check_freq=False) -@pytest.mark.xfail( - condition=not PANDAS_GE_130, - reason="https://github.com/pandas-dev/pandas/issues/37051", -) def test_rolling_var_uniform_window(): """ Pandas adopts an online variance calculation algorithm. This gives a @@ -310,17 +294,17 @@ def test_rolling_getitem(): def test_rolling_getitem_window(): - if PANDAS_GE_110: - kwargs = {"check_freq": False} - else: - kwargs = {} index = pd.DatetimeIndex( pd.date_range("2000-01-01", "2000-01-02", freq="1h") ) pdf = pd.DataFrame({"x": np.arange(len(index))}, index=index) gdf = cudf.from_pandas(pdf) - assert_eq(pdf.rolling("2h").x.mean(), gdf.rolling("2h").x.mean(), **kwargs) + assert_eq( + pdf.rolling("2h").x.mean(), + gdf.rolling("2h").x.mean(), + check_freq=False, + ) @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index ce519a445ba..7123069d5b8 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -13,7 +13,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_120, PANDAS_LT_140 +from cudf.core._compat import PANDAS_LT_140 from cudf.testing._utils import ( NUMERIC_TYPES, TIMEDELTA_TYPES, @@ -1842,14 +1842,7 @@ def test_isin_datetime(data, values): ["this", "is"], [None, None, None], ["12", "14", "19"], - pytest.param( - [12, 14, 19], - marks=pytest.mark.xfail( - not PANDAS_GE_120, - reason="pandas's failure here seems like a bug(in < 1.2) " - "given the reverse succeeds", - ), - ), + [12, 14, 19], ["is", "this", "is", "this", "is"], ], ) diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py index d59226ee17a..4d9ffc7cd81 100644 --- a/python/cudf/cudf/tests/test_setitem.py +++ b/python/cudf/cudf/tests/test_setitem.py @@ -5,7 +5,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_120, PANDAS_GE_150, PANDAS_LE_122 +from cudf.core._compat import PANDAS_GE_150 from cudf.testing._utils import assert_eq, assert_exceptions_equal @@ -20,10 +20,6 @@ def test_dataframe_setitem_bool_mask_scaler(df, arg, value): assert_eq(df, gdf) -@pytest.mark.xfail( - condition=PANDAS_GE_120 and PANDAS_LE_122, - reason="https://github.com/pandas-dev/pandas/issues/40204", -) def test_dataframe_setitem_scaler_bool(): df = pd.DataFrame({"a": [1, 2, 3]}) df[[True, False, True]] = pd.DataFrame({"a": [-1, -2]}) diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 0e6ed444c32..10208611f13 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -15,7 +15,7 @@ import cudf from cudf import concat -from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_150 +from cudf.core._compat import PANDAS_GE_150 from cudf.core.column.string import StringColumn from cudf.core.index import StringIndex, as_index from cudf.testing._utils import ( @@ -415,20 +415,8 @@ def _cat_convert_seq_to_cudf(others): ("f", "g", "h", "i", "j"), pd.Series(["f", "g", "h", "i", "j"]), pd.Series(["AbC", "de", "FGHI", "j", "kLm"]), - pytest.param( - pd.Index(["f", "g", "h", "i", "j"]), - marks=pytest.mark.xfail( - condition=not PANDAS_GE_110, - reason="https://github.com/pandas-dev/pandas/issues/33436", - ), - ), - pytest.param( - pd.Index(["AbC", "de", "FGHI", "j", "kLm"]), - marks=pytest.mark.xfail( - condition=not PANDAS_GE_110, - reason="https://github.com/pandas-dev/pandas/issues/33436", - ), - ), + pd.Index(["f", "g", "h", "i", "j"]), + pd.Index(["AbC", "de", "FGHI", "j", "kLm"]), ( np.array(["f", "g", "h", "i", "j"]), np.array(["f", "g", "h", "i", "j"]), @@ -453,38 +441,26 @@ def _cat_convert_seq_to_cudf(others): pd.Series(["f", "g", "h", "i", "j"]), np.array(["f", "g", "h", "i", "j"]), ), - pytest.param( - ( - pd.Series(["f", "g", "h", "i", "j"]), - np.array(["f", "a", "b", "f", "a"]), - pd.Series(["f", "g", "h", "i", "j"]), - np.array(["f", "a", "b", "f", "a"]), - np.array(["f", "a", "b", "f", "a"]), - pd.Index(["1", "2", "3", "4", "5"]), - np.array(["f", "a", "b", "f", "a"]), - pd.Index(["f", "g", "h", "i", "j"]), - ), - marks=pytest.mark.xfail( - condition=not PANDAS_GE_110, - reason="https://github.com/pandas-dev/pandas/issues/33436", - ), - ), - pytest.param( - [ - pd.Index(["f", "g", "h", "i", "j"]), - np.array(["f", "a", "b", "f", "a"]), - pd.Series(["f", "g", "h", "i", "j"]), - np.array(["f", "a", "b", "f", "a"]), - np.array(["f", "a", "b", "f", "a"]), - pd.Index(["f", "g", "h", "i", "j"]), - np.array(["f", "a", "b", "f", "a"]), - pd.Index(["f", "g", "h", "i", "j"]), - ], - marks=pytest.mark.xfail( - condition=not PANDAS_GE_110, - reason="https://github.com/pandas-dev/pandas/issues/33436", - ), + ( + pd.Series(["f", "g", "h", "i", "j"]), + np.array(["f", "a", "b", "f", "a"]), + pd.Series(["f", "g", "h", "i", "j"]), + np.array(["f", "a", "b", "f", "a"]), + np.array(["f", "a", "b", "f", "a"]), + pd.Index(["1", "2", "3", "4", "5"]), + np.array(["f", "a", "b", "f", "a"]), + pd.Index(["f", "g", "h", "i", "j"]), ), + [ + pd.Index(["f", "g", "h", "i", "j"]), + np.array(["f", "a", "b", "f", "a"]), + pd.Series(["f", "g", "h", "i", "j"]), + np.array(["f", "a", "b", "f", "a"]), + np.array(["f", "a", "b", "f", "a"]), + pd.Index(["f", "g", "h", "i", "j"]), + np.array(["f", "a", "b", "f", "a"]), + pd.Index(["f", "g", "h", "i", "j"]), + ], [ pd.Series(["hello", "world", "abc", "xyz", "pqr"]), pd.Series(["abc", "xyz", "hello", "pqr", "world"]), @@ -582,20 +558,8 @@ def test_string_cat(ps_gs, others, sep, na_rep, index): ("f", "g", "h", "i", "j"), pd.Series(["f", "g", "h", "i", "j"]), pd.Series(["AbC", "de", "FGHI", "j", "kLm"]), - pytest.param( - pd.Index(["f", "g", "h", "i", "j"]), - marks=pytest.mark.xfail( - condition=not PANDAS_GE_110, - reason="https://github.com/pandas-dev/pandas/issues/33436", - ), - ), - pytest.param( - pd.Index(["AbC", "de", "FGHI", "j", "kLm"]), - marks=pytest.mark.xfail( - condition=not PANDAS_GE_110, - reason="https://github.com/pandas-dev/pandas/issues/33436", - ), - ), + pd.Index(["f", "g", "h", "i", "j"]), + pd.Index(["AbC", "de", "FGHI", "j", "kLm"]), ( np.array(["f", "g", "h", "i", "j"]), np.array(["f", "g", "h", "i", "j"]), @@ -608,38 +572,26 @@ def test_string_cat(ps_gs, others, sep, na_rep, index): pd.Series(["f", "g", "h", "i", "j"]), pd.Series(["f", "g", "h", "i", "j"]), ], - pytest.param( - ( - pd.Series(["f", "g", "h", "i", "j"]), - np.array(["f", "a", "b", "f", "a"]), - pd.Series(["f", "g", "h", "i", "j"]), - np.array(["f", "a", "b", "f", "a"]), - np.array(["f", "a", "b", "f", "a"]), - pd.Index(["1", "2", "3", "4", "5"]), - np.array(["f", "a", "b", "f", "a"]), - pd.Index(["f", "g", "h", "i", "j"]), - ), - marks=pytest.mark.xfail( - condition=not PANDAS_GE_110, - reason="https://github.com/pandas-dev/pandas/issues/33436", - ), - ), - pytest.param( - [ - pd.Index(["f", "g", "h", "i", "j"]), - np.array(["f", "a", "b", "f", "a"]), - pd.Series(["f", "g", "h", "i", "j"]), - np.array(["f", "a", "b", "f", "a"]), - np.array(["f", "a", "b", "f", "a"]), - pd.Index(["f", "g", "h", "i", "j"]), - np.array(["f", "a", "b", "f", "a"]), - pd.Index(["f", "g", "h", "i", "j"]), - ], - marks=pytest.mark.xfail( - condition=not PANDAS_GE_110, - reason="https://github.com/pandas-dev/pandas/issues/33436", - ), + ( + pd.Series(["f", "g", "h", "i", "j"]), + np.array(["f", "a", "b", "f", "a"]), + pd.Series(["f", "g", "h", "i", "j"]), + np.array(["f", "a", "b", "f", "a"]), + np.array(["f", "a", "b", "f", "a"]), + pd.Index(["1", "2", "3", "4", "5"]), + np.array(["f", "a", "b", "f", "a"]), + pd.Index(["f", "g", "h", "i", "j"]), ), + [ + pd.Index(["f", "g", "h", "i", "j"]), + np.array(["f", "a", "b", "f", "a"]), + pd.Series(["f", "g", "h", "i", "j"]), + np.array(["f", "a", "b", "f", "a"]), + np.array(["f", "a", "b", "f", "a"]), + pd.Index(["f", "g", "h", "i", "j"]), + np.array(["f", "a", "b", "f", "a"]), + pd.Index(["f", "g", "h", "i", "j"]), + ], [ pd.Series( ["hello", "world", "abc", "xyz", "pqr"], @@ -701,20 +653,8 @@ def test_string_index_str_cat(data, others, sep, na_rep, name): None, ["f", "g", "h", "i", "j"], pd.Series(["AbC", "de", "FGHI", "j", "kLm"]), - pytest.param( - pd.Index(["f", "g", "h", "i", "j"]), - marks=pytest.mark.xfail( - condition=not PANDAS_GE_110, - reason="https://github.com/pandas-dev/pandas/issues/33436", - ), - ), - pytest.param( - pd.Index(["AbC", "de", "FGHI", "j", "kLm"]), - marks=pytest.mark.xfail( - condition=not PANDAS_GE_110, - reason="https://github.com/pandas-dev/pandas/issues/33436", - ), - ), + pd.Index(["f", "g", "h", "i", "j"]), + pd.Index(["AbC", "de", "FGHI", "j", "kLm"]), [ np.array(["f", "g", "h", "i", "j"]), np.array(["f", "g", "h", "i", "j"]), diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py index 468773387c1..4b1e8cf1027 100644 --- a/python/cudf/cudf/tests/test_timedelta.py +++ b/python/cudf/cudf/tests/test_timedelta.py @@ -9,7 +9,6 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_120 from cudf.testing import _utils as utils from cudf.testing._utils import assert_eq, assert_exceptions_equal @@ -414,13 +413,7 @@ def test_timedelta_dataframe_ops(df, op): np.timedelta64(4, "s"), np.timedelta64(456, "D"), np.timedelta64(46, "h"), - pytest.param( - np.timedelta64("nat"), - marks=pytest.mark.xfail( - condition=not PANDAS_GE_120, - reason="https://github.com/pandas-dev/pandas/issues/35529", - ), - ), + np.timedelta64("nat"), np.timedelta64(1, "s"), np.timedelta64(1, "ms"), np.timedelta64(1, "us"), @@ -435,13 +428,7 @@ def test_timedelta_dataframe_ops(df, op): "sub", "truediv", "mod", - pytest.param( - "floordiv", - marks=pytest.mark.xfail( - condition=not PANDAS_GE_120, - reason="https://github.com/pandas-dev/pandas/issues/35529", - ), - ), + "floordiv", ], ) def test_timedelta_series_ops_with_scalars(data, other_scalars, dtype, op): @@ -541,13 +528,7 @@ def test_timedelta_series_mod_with_scalar_zero(reverse): datetime.timedelta(seconds=768), datetime.timedelta(microseconds=7), np.timedelta64(4, "s"), - pytest.param( - np.timedelta64("nat", "s"), - marks=pytest.mark.xfail( - condition=not PANDAS_GE_120, - reason="https://github.com/pandas-dev/pandas/issues/35529", - ), - ), + np.timedelta64("nat", "s"), np.timedelta64(1, "s"), np.timedelta64(1, "ms"), np.timedelta64(1, "us"), @@ -563,13 +544,7 @@ def test_timedelta_series_mod_with_scalar_zero(reverse): "sub", "truediv", "mod", - pytest.param( - "floordiv", - marks=pytest.mark.xfail( - condition=not PANDAS_GE_120, - reason="https://github.com/pandas-dev/pandas/issues/35529", - ), - ), + "floordiv", ], ) def test_timedelta_series_ops_with_cudf_scalars(data, cpu_scalar, dtype, op): @@ -858,13 +833,7 @@ def test_timedelta_datetime_index_ops_misc( "add", "sub", "truediv", - pytest.param( - "floordiv", - marks=pytest.mark.xfail( - condition=not PANDAS_GE_120, - reason="https://github.com/pandas-dev/pandas/issues/35529", - ), - ), + "floordiv", ], ) @pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning:pandas") @@ -938,13 +907,7 @@ def test_timedelta_index_ops_with_scalars( "add", "sub", "truediv", - pytest.param( - "floordiv", - marks=pytest.mark.xfail( - condition=not PANDAS_GE_120, - reason="https://github.com/pandas-dev/pandas/issues/35529", - ), - ), + "floordiv", ], ) def test_timedelta_index_ops_with_cudf_scalars( diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index 92c23d8b97b..acf00b3a3d5 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. import datetime from collections import namedtuple @@ -12,7 +12,6 @@ import cudf from cudf.api.types import is_bool, is_float, is_integer -from cudf.core._compat import PANDAS_GE_120 from cudf.core.missing import NA _NA_REP = "" @@ -90,13 +89,13 @@ "boolean": "bool", } -if PANDAS_GE_120: - np_dtypes_to_pandas_dtypes[np.dtype("float32")] = pd.Float32Dtype() - np_dtypes_to_pandas_dtypes[np.dtype("float64")] = pd.Float64Dtype() - pandas_dtypes_to_np_dtypes[pd.Float32Dtype()] = np.dtype("float32") - pandas_dtypes_to_np_dtypes[pd.Float64Dtype()] = np.dtype("float64") - pandas_dtypes_alias_to_cudf_alias["Float32"] = "float32" - pandas_dtypes_alias_to_cudf_alias["Float64"] = "float64" + +np_dtypes_to_pandas_dtypes[np.dtype("float32")] = pd.Float32Dtype() +np_dtypes_to_pandas_dtypes[np.dtype("float64")] = pd.Float64Dtype() +pandas_dtypes_to_np_dtypes[pd.Float32Dtype()] = np.dtype("float32") +pandas_dtypes_to_np_dtypes[pd.Float64Dtype()] = np.dtype("float64") +pandas_dtypes_alias_to_cudf_alias["Float32"] = "float32" +pandas_dtypes_alias_to_cudf_alias["Float64"] = "float64" SIGNED_INTEGER_TYPES = {"int8", "int16", "int32", "int64"} UNSIGNED_TYPES = {"uint8", "uint16", "uint32", "uint64"} diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index ca14ccfc63e..5b259b1dc66 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -31,10 +31,10 @@ dependencies = [ "cuda-python>=11.7.1,<12.0", "fsspec>=0.6.0", "numba>=0.56.2", - "numpy", + "numpy>=1.21", "nvtx>=0.2.1", "packaging", - "pandas>=1.0,<1.6.0dev0", + "pandas>=1.3,<1.6.0dev0", "protobuf>=4.21.6,<4.22", "typing_extensions", # Allow floating minor versions for Arrow. diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py index 741dbc28e6c..cfb951901d3 100644 --- a/python/dask_cudf/dask_cudf/tests/test_groupby.py +++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. import contextlib @@ -11,7 +11,6 @@ from dask.utils_test import hlg_layer import cudf -from cudf.core._compat import PANDAS_GE_120 import dask_cudf from dask_cudf.groupby import OPTIMIZED_AGGS, _aggs_optimized @@ -160,18 +159,8 @@ def test_groupby_agg_empty_partition(tmpdir, split_out): @pytest.mark.parametrize( "func", [ - pytest.param( - lambda df: df.groupby(["a", "b"]).x.sum(), - marks=pytest.mark.xfail( - condition=not PANDAS_GE_120, reason="pandas bug" - ), - ), - pytest.param( - lambda df: df.groupby(["a", "b"]).sum(), - marks=pytest.mark.xfail( - condition=not PANDAS_GE_120, reason="pandas bug" - ), - ), + lambda df: df.groupby(["a", "b"]).x.sum(), + lambda df: df.groupby(["a", "b"]).sum(), pytest.param( lambda df: df.groupby(["a", "b"]).agg({"x", "sum"}), marks=pytest.mark.xfail, diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 07b0edb6008..79a9aca9e96 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -21,8 +21,8 @@ dependencies = [ "dask>=2023.1.1", "distributed>=2023.1.1", "fsspec>=0.6.0", - "numpy", - "pandas>=1.0,<1.6.0dev0", + "numpy>=1.21", + "pandas>=1.3,<1.6.0dev0", "cudf==23.4.*", "cupy-cuda11x", ] @@ -40,8 +40,8 @@ dynamic = ["entry-points"] [project.optional-dependencies] test = [ - "numpy", - "pandas>=1.0,<1.6.0dev0", + "numpy>=1.21", + "pandas>=1.3,<1.6.0dev0", "pytest", "pytest-xdist", "numba>=0.56.2",