From fe9642b55b2d076d2c361f2b6011a0d484b4fe04 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 13 Mar 2024 07:43:19 -0500 Subject: [PATCH] Change cross-pandas-version testing in `cudf` (#15145) This PR removes redundant version checks in a lot of pytests. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Matthew Roeschke (https://github.com/mroeschke) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/15145 --- docs/cudf/source/developer_guide/testing.md | 19 +++++ python/cudf/cudf/core/_compat.py | 8 +- .../cudf/cudf/tests/indexes/test_interval.py | 8 +- python/cudf/cudf/tests/test_api_types.py | 29 +++++-- python/cudf/cudf/tests/test_applymap.py | 10 ++- python/cudf/cudf/tests/test_array_ufunc.py | 20 +++-- python/cudf/cudf/tests/test_binops.py | 8 +- python/cudf/cudf/tests/test_csv.py | 14 ++-- python/cudf/cudf/tests/test_dataframe.py | 18 ++--- python/cudf/cudf/tests/test_datetime.py | 77 +++++++----------- python/cudf/cudf/tests/test_groupby.py | 37 ++++++--- python/cudf/cudf/tests/test_indexing.py | 29 ++++--- python/cudf/cudf/tests/test_interpolate.py | 20 +++-- python/cudf/cudf/tests/test_join_order.py | 5 +- python/cudf/cudf/tests/test_joining.py | 18 ++--- python/cudf/cudf/tests/test_json.py | 41 +++++----- python/cudf/cudf/tests/test_orc.py | 19 ++--- python/cudf/cudf/tests/test_parquet.py | 80 ++++++++----------- python/cudf/cudf/tests/test_replace.py | 36 +++++---- python/cudf/cudf/tests/test_resampling.py | 5 +- python/cudf/cudf/tests/test_reshape.py | 6 +- python/cudf/cudf/tests/test_series.py | 8 +- python/cudf/cudf/tests/test_setitem.py | 8 +- python/cudf/cudf/tests/test_sorting.py | 8 +- python/cudf/cudf/tests/test_stats.py | 9 ++- python/cudf/cudf/tests/test_udf_masked_ops.py | 15 ++-- 26 files changed, 310 insertions(+), 245 deletions(-) diff --git a/docs/cudf/source/developer_guide/testing.md b/docs/cudf/source/developer_guide/testing.md index e3939724189..a28a6b9192d 100644 --- a/docs/cudf/source/developer_guide/testing.md +++ b/docs/cudf/source/developer_guide/testing.md @@ -249,3 +249,22 @@ In particular: - `testing._utils.assert_eq` is the biggest hammer to reach for. It can be used to compare any pair of objects. - For comparing specific objects, use `testing.testing.assert_[frame|series|index]_equal`. - For verifying that the expected assertions are raised, use `testing._utils.assert_exceptions_equal`. + + +### Version testing + +It is recommended to have `cudf` pytests only work on the latest supported pandas version i.e., `PANDAS_CURRENT_SUPPORTED_VERSION`. Any anticipated failures should be either `skipped` or `xfailed`. + +For example: + +```python +@pytest.mark.skipif(PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, reason="bug in older version of pandas") +def test_bug_from_older_pandas_versions(...): + ... + +@pytest.mark.xfail(PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION, reason="bug in latest version of pandas") +def test_bug_in_current_and_maybe_future_versions(...): + ... +``` + +If pandas makes a bugfix release and fixes this, then we'll see it in CI immediately, patch it, and bump `PANDAS_CURRENT_SUPPORTED_VERSION` which also usually happens during pandas upgrades. diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py index 7fcb353a800..fba3a98e56d 100644 --- a/python/cudf/cudf/core/_compat.py +++ b/python/cudf/cudf/core/_compat.py @@ -3,12 +3,10 @@ import pandas as pd from packaging import version +PANDAS_CURRENT_SUPPORTED_VERSION = version.parse("2.2.1") PANDAS_VERSION = version.parse(pd.__version__) -PANDAS_EQ_200 = PANDAS_VERSION == version.parse("2.0.0") -PANDAS_GE_200 = PANDAS_VERSION >= version.parse("2.0.0") -PANDAS_GE_201 = PANDAS_VERSION >= version.parse("2.0.1") + + PANDAS_GE_210 = PANDAS_VERSION >= version.parse("2.1.0") -PANDAS_GE_214 = PANDAS_VERSION >= version.parse("2.1.4") -PANDAS_LT_203 = PANDAS_VERSION < version.parse("2.0.3") PANDAS_GE_220 = PANDAS_VERSION >= version.parse("2.2.0") PANDAS_LT_300 = PANDAS_VERSION < version.parse("3.0.0") diff --git a/python/cudf/cudf/tests/indexes/test_interval.py b/python/cudf/cudf/tests/indexes/test_interval.py index 365465db1e1..d59041e32d5 100644 --- a/python/cudf/cudf/tests/indexes/test_interval.py +++ b/python/cudf/cudf/tests/indexes/test_interval.py @@ -5,7 +5,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_210 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.index import IntervalIndex, interval_range from cudf.testing._utils import assert_eq @@ -315,8 +315,8 @@ def test_interval_index_from_breaks(closed): 1.0, 0.2, None, - marks=pytest.mark.xfail( - condition=not PANDAS_GE_210, + marks=pytest.mark.skipif( + condition=PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, reason="https://github.com/pandas-dev/pandas/pull/54477", ), ), @@ -327,7 +327,7 @@ def test_interval_index_from_breaks(closed): 0.1, None, marks=pytest.mark.xfail( - condition=not PANDAS_GE_210, + condition=PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, reason="https://github.com/pandas-dev/pandas/pull/54477", ), ), diff --git a/python/cudf/cudf/tests/test_api_types.py b/python/cudf/cudf/tests/test_api_types.py index 6cb267ae0e8..9436d65e0b7 100644 --- a/python/cudf/cudf/tests/test_api_types.py +++ b/python/cudf/cudf/tests/test_api_types.py @@ -7,8 +7,7 @@ import cudf from cudf.api import types -from cudf.core._compat import PANDAS_GE_210, PANDAS_GE_214, PANDAS_GE_220 -from cudf.testing._utils import expect_warning_if +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION @pytest.mark.parametrize( @@ -499,8 +498,22 @@ def test_is_integer(obj, expect): (pd.Series(dtype="int"), False), (pd.Series(dtype="float"), False), (pd.Series(dtype="complex"), False), - (pd.Series(dtype="str"), PANDAS_GE_220), - (pd.Series(dtype="unicode"), PANDAS_GE_220), + pytest.param( + pd.Series(dtype="str"), + True, + marks=pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="bug in previous pandas versions", + ), + ), + pytest.param( + pd.Series(dtype="unicode"), + True, + marks=pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="bug in previous pandas versions", + ), + ), (pd.Series(dtype="datetime64[s]"), False), (pd.Series(dtype="timedelta64[s]"), False), (pd.Series(dtype="category"), False), @@ -964,6 +977,10 @@ def test_is_decimal_dtype(obj, expect): assert types.is_decimal_dtype(obj) == expect +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="inconsistent warnings in older pandas versions", +) @pytest.mark.parametrize( "obj", ( @@ -1037,9 +1054,7 @@ def test_is_decimal_dtype(obj, expect): ), ) def test_pandas_agreement(obj): - with expect_warning_if( - PANDAS_GE_210, DeprecationWarning if PANDAS_GE_214 else FutureWarning - ): + with pytest.warns(DeprecationWarning): expected = pd_types.is_categorical_dtype(obj) with pytest.warns(DeprecationWarning): actual = types.is_categorical_dtype(obj) diff --git a/python/cudf/cudf/tests/test_applymap.py b/python/cudf/cudf/tests/test_applymap.py index cfe4237180e..d720e6ce2ce 100644 --- a/python/cudf/cudf/tests/test_applymap.py +++ b/python/cudf/cudf/tests/test_applymap.py @@ -3,10 +3,14 @@ import pytest from cudf import NA, DataFrame -from cudf.core._compat import PANDAS_GE_210, PANDAS_GE_220 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.testing import _utils as utils +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in all versions of pandas", +) @pytest.mark.parametrize( "data", [ @@ -29,7 +33,7 @@ def test_applymap_dataframe(data, func, na_action, request): request.applymarker( pytest.mark.xfail( - PANDAS_GE_220 + PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION and request.node.callspec.id == "None-2-data3", reason="https://github.com/pandas-dev/pandas/issues/57390", ) @@ -37,7 +41,7 @@ def test_applymap_dataframe(data, func, na_action, request): gdf = DataFrame(data) pdf = gdf.to_pandas(nullable=True) - with utils.expect_warning_if(PANDAS_GE_210): + with pytest.warns(FutureWarning): expect = pdf.applymap(func, na_action=na_action) with pytest.warns(FutureWarning): got = gdf.applymap(func, na_action=na_action) diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py index 0eb1d6de3a4..b036c1f13f3 100644 --- a/python/cudf/cudf/tests/test_array_ufunc.py +++ b/python/cudf/cudf/tests/test_array_ufunc.py @@ -10,7 +10,11 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_210, PANDAS_LT_300 +from cudf.core._compat import ( + PANDAS_CURRENT_SUPPORTED_VERSION, + PANDAS_LT_300, + PANDAS_VERSION, +) from cudf.testing._utils import ( assert_eq, expect_warning_if, @@ -143,6 +147,10 @@ def test_binary_ufunc_index_array(ufunc, reflect): assert_eq(got, expect, check_exact=False) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize("ufunc", _UFUNCS) @pytest.mark.parametrize("has_nulls", [True, False]) @pytest.mark.parametrize("indexed", [True, False]) @@ -231,8 +239,7 @@ def test_ufunc_series(request, ufunc, has_nulls, indexed): else: if has_nulls: with expect_warning_if( - PANDAS_GE_210 - and fname + fname in ( "isfinite", "isinf", @@ -351,6 +358,10 @@ def test_ufunc_cudf_series_error_with_out_kwarg(func): # Skip matmul since it requires aligned shapes. +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize("ufunc", (uf for uf in _UFUNCS if uf != np.matmul)) @pytest.mark.parametrize("has_nulls", [True, False]) @pytest.mark.parametrize("indexed", [True, False]) @@ -431,8 +442,7 @@ def test_ufunc_dataframe(request, ufunc, has_nulls, indexed): else: if has_nulls: with expect_warning_if( - PANDAS_GE_210 - and fname + fname in ( "isfinite", "isinf", diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 75b393f513a..438f3e35ec8 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -13,7 +13,7 @@ import cudf from cudf import Series -from cudf.core._compat import PANDAS_GE_220 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.buffer.spill_manager import get_global_manager from cudf.core.index import as_index from cudf.testing import _utils as utils @@ -829,7 +829,7 @@ def test_operator_func_series_and_scalar_logical( ): request.applymarker( pytest.mark.xfail( - PANDAS_GE_220 + PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION and fill_value == 1.0 and scalar is np.nan and (has_nulls or (not has_nulls and func not in {"eq", "ne"})), @@ -1719,7 +1719,7 @@ def test_datetime_dateoffset_binaryop( ): request.applymarker( pytest.mark.xfail( - PANDAS_GE_220 + PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION and dtype in {"datetime64[ms]", "datetime64[s]"} and frequency == "microseconds" and n_periods == 0, @@ -1829,7 +1829,7 @@ def test_datetime_dateoffset_binaryop_reflected(n_periods, frequency, dtype): # TODO: Remove check_dtype once we get some clarity on: # https://github.com/pandas-dev/pandas/issues/57448 - utils.assert_eq(expect, got, check_dtype=not PANDAS_GE_220) + utils.assert_eq(expect, got, check_dtype=False) with pytest.raises(TypeError): poffset - psr diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index 5942c89b9ef..2d728fb94ba 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -17,7 +17,7 @@ import cudf from cudf import read_csv -from cudf.core._compat import PANDAS_GE_200 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.testing._utils import assert_eq, assert_exceptions_equal @@ -344,6 +344,10 @@ def test_csv_reader_dtype_extremes(use_names): assert_eq(gdf, pdf) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="https://github.com/pandas-dev/pandas/issues/52449", +) def test_csv_reader_skiprows_skipfooter(tmpdir, pd_mixed_dataframe): fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file5.csv") @@ -372,12 +376,8 @@ def test_csv_reader_skiprows_skipfooter(tmpdir, pd_mixed_dataframe): assert len(out.columns) == len(df_out.columns) assert len(out) == len(df_out) - if PANDAS_GE_200: - # TODO: Remove typecast to `ns` after following - # issue is fixed: - # https://github.com/pandas-dev/pandas/issues/52449 - out["2"] = out["2"].astype("datetime64[ns]") - assert_eq(df_out, out) + + assert_eq(df_out, out, check_dtype=False) def test_csv_reader_negative_vals(tmpdir): diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index e6cf3988d23..a11873a1363 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -26,7 +26,7 @@ import cudf from cudf.api.extensions import no_default -from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_210 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.buffer.spill_manager import get_global_manager from cudf.core.column import column from cudf.errors import MixedTypeError @@ -1347,11 +1347,7 @@ def test_dataframe_setitem_from_masked_object(): def test_dataframe_append_to_empty(): pdf = pd.DataFrame() pdf["a"] = [] - if PANDAS_GE_200: - # TODO: Remove this workaround after - # the following bug is fixed: - # https://github.com/pandas-dev/pandas/issues/56679 - pdf["a"] = pdf["a"].astype("str") + pdf["a"] = pdf["a"].astype("str") pdf["b"] = [1, 2, 3] gdf = cudf.DataFrame() @@ -6724,7 +6720,8 @@ def test_dataframe_init_from_arrays_cols(data, cols, index): def test_dataframe_assign_scalar(request, col_data, assign_val): request.applymarker( pytest.mark.xfail( - condition=PANDAS_GE_200 and len(col_data) == 0, + condition=PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION + and len(col_data) == 0, reason="https://github.com/pandas-dev/pandas/issues/56679", ) ) @@ -9970,6 +9967,10 @@ def test_dataframe_rename_duplicate_column(): @pytest_unmark_spilling +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize( "data", [ @@ -9990,8 +9991,7 @@ def test_dataframe_pct_change(data, periods, fill_method): with expect_warning_if(fill_method is not no_default): actual = gdf.pct_change(periods=periods, fill_method=fill_method) with expect_warning_if( - PANDAS_GE_210 - and (fill_method is not no_default or pdf.isna().any().any()) + fill_method is not no_default or pdf.isna().any().any() ): expected = pdf.pct_change(periods=periods, fill_method=fill_method) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index cceb6efaaae..7c209078fd2 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -13,7 +13,7 @@ import cudf import cudf.testing.dataset_generator as dataset_generator from cudf import DataFrame, Series -from cudf.core._compat import PANDAS_EQ_200, PANDAS_GE_210 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.index import DatetimeIndex from cudf.testing._utils import ( DATETIME_TYPES, @@ -1577,19 +1577,11 @@ def test_date_range_start_freq_periods(request, start, freq, periods): ) -def test_date_range_end_freq_periods(request, end, freq, periods): - request.applymarker( - pytest.mark.xfail( - condition=( - not PANDAS_GE_210 - and "nanoseconds" in freq - and periods != 1 - and end == "1970-01-01 00:00:00" - ), - reason="https://github.com/pandas-dev/pandas/issues/46877", - ) - ) - +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="https://github.com/pandas-dev/pandas/issues/46877", +) +def test_date_range_end_freq_periods(end, freq, periods): if isinstance(freq, str): _gfreq = _pfreq = freq else: @@ -1852,6 +1844,10 @@ def test_error_values(): s.values +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="https://github.com/pandas-dev/pandas/issues/52761", +) @pytest.mark.parametrize( "data", [ @@ -1873,22 +1869,7 @@ def test_error_values(): @pytest.mark.parametrize( "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"] ) -def test_ceil(request, data, time_type, resolution): - alias_map = {"L": "ms", "U": "us", "N": "ns"} - request.applymarker( - pytest.mark.xfail( - condition=( - PANDAS_EQ_200 - and resolution in {"L", "ms", "U", "us", "N"} - and np.dtype( - f"datetime64[{alias_map.get(resolution, resolution)}]" - ) - > np.dtype(time_type) - ), - reason="https://github.com/pandas-dev/pandas/issues/52761", - strict=True, - ) - ) +def test_ceil(data, time_type, resolution): gs = cudf.Series(data, dtype=time_type) ps = gs.to_pandas() @@ -1897,6 +1878,10 @@ def test_ceil(request, data, time_type, resolution): assert_eq(expect, got) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="https://github.com/pandas-dev/pandas/issues/52761", +) @pytest.mark.parametrize( "data", [ @@ -1918,23 +1903,7 @@ def test_ceil(request, data, time_type, resolution): @pytest.mark.parametrize( "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"] ) -def test_floor(request, data, time_type, resolution): - alias_map = {"L": "ms", "U": "us", "N": "ns"} - request.applymarker( - pytest.mark.xfail( - condition=( - PANDAS_EQ_200 - and resolution in {"L", "ms", "U", "us", "N"} - and np.dtype( - f"datetime64[{alias_map.get(resolution, resolution)}]" - ) - > np.dtype(time_type) - ), - reason="https://github.com/pandas-dev/pandas/issues/52761", - strict=True, - ) - ) - +def test_floor(data, time_type, resolution): gs = cudf.Series(data, dtype=time_type) ps = gs.to_pandas() @@ -1973,6 +1942,10 @@ def test_round(data, time_type, resolution): assert_eq(expect, got) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize( "idx", [ @@ -2004,7 +1977,7 @@ def test_first(idx, offset): p = pd.Series(range(len(idx)), dtype="int64", index=idx) g = cudf.from_pandas(p) - with expect_warning_if(PANDAS_GE_210): + with pytest.warns(FutureWarning): expect = p.first(offset=offset) with pytest.warns(FutureWarning): got = g.first(offset=offset) @@ -2036,7 +2009,7 @@ def test_first_start_at_end_of_month(idx, offset): p = pd.Series(range(len(idx)), index=idx) g = cudf.from_pandas(p) - with expect_warning_if(PANDAS_GE_210): + with pytest.warns(FutureWarning): expect = p.first(offset=offset) with pytest.warns(FutureWarning): got = g.first(offset=offset) @@ -2044,6 +2017,10 @@ def test_first_start_at_end_of_month(idx, offset): assert_eq(expect, got) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize( "idx", [ @@ -2075,7 +2052,7 @@ def test_last(idx, offset): p = pd.Series(range(len(idx)), dtype="int64", index=idx) g = cudf.from_pandas(p) - with expect_warning_if(PANDAS_GE_210): + with pytest.warns(FutureWarning): expect = p.last(offset=offset) with pytest.warns(FutureWarning): got = g.last(offset=offset) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 8dbd74f4edf..befa9b467dd 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -20,7 +20,7 @@ import cudf from cudf import DataFrame, Series from cudf.api.extensions import no_default -from cudf.core._compat import PANDAS_GE_210, PANDAS_GE_220 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.udf._ops import arith_ops, comparison_ops, unary_ops from cudf.core.udf.groupby_typing import SUPPORTED_GROUPBY_NUMPY_TYPES from cudf.core.udf.utils import UDFError, precompiled @@ -1424,6 +1424,10 @@ def test_groupby_multi_agg_hash_groupby(agg): assert_groupby_results_equal(pdg, gdg, check_dtype=check_dtype) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="previous verion of pandas throws a warning", +) @pytest.mark.parametrize( "agg", ["min", "max", "idxmax", "idxmin", "sum", "prod", "count", "mean"] ) @@ -1463,12 +1467,12 @@ def test_groupby_nulls_basic(agg): # TODO: fillna() used here since we don't follow # Pandas' null semantics. Should we change it? - with expect_warning_if(agg in {"idxmax", "idxmin"} and not PANDAS_GE_220): - assert_groupby_results_equal( - getattr(pdf.groupby("a"), agg)().fillna(0), - getattr(gdf.groupby("a"), agg)().fillna(0 if agg != "prod" else 1), - check_dtype=check_dtype, - ) + + assert_groupby_results_equal( + getattr(pdf.groupby("a"), agg)().fillna(0), + getattr(gdf.groupby("a"), agg)().fillna(0 if agg != "prod" else 1), + check_dtype=check_dtype, + ) def test_groupby_nulls_in_index(): @@ -2850,6 +2854,10 @@ def test_groupby_various_by_fillna(by, data, args): assert_groupby_results_equal(expect, got, check_dtype=False) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize("nelem", [10, 100, 1000]) @pytest.mark.parametrize("method", ["ffill", "bfill"]) def test_groupby_fillna_method(nelem, method): @@ -2889,7 +2897,7 @@ def test_groupby_fillna_method(nelem, method): pdf = t.to_pandas() gdf = cudf.from_pandas(pdf) - with expect_warning_if(PANDAS_GE_210): + with pytest.warns(FutureWarning): expect = pdf.groupby(key_col).fillna(method=method) with pytest.warns(FutureWarning): got = gdf.groupby(key_col).fillna(method=method) @@ -3235,6 +3243,10 @@ def test_groupby_transform_maintain_index(by): ) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize( "data, gkey", [ @@ -3275,8 +3287,7 @@ def test_groupby_pct_change(data, gkey, periods, fill_method): periods=periods, fill_method=fill_method ) with expect_warning_if( - PANDAS_GE_210 - and ( + ( fill_method not in (no_default, None) or (fill_method is not None and pdf.isna().any().any()) ) @@ -3368,6 +3379,10 @@ def test_groupby_ngroup(by, ascending, df_ngroup): assert_eq(expected, actual, check_dtype=False) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize( "groups", ["a", "b", "c", ["a", "c"], ["a", "b", "c"]] ) @@ -3376,7 +3391,7 @@ def test_groupby_dtypes(groups): {"a": [1, 2, 3, 3], "b": ["x", "y", "z", "a"], "c": [10, 11, 12, 12]} ) pdf = df.to_pandas() - with expect_warning_if(PANDAS_GE_210): + with pytest.warns(FutureWarning): expected = pdf.groupby(groups).dtypes with pytest.warns(FutureWarning): actual = df.groupby(groups).dtypes diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index 0e6de3d3b4a..5f5c4579e01 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -9,7 +9,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_210 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.testing import _utils as utils from cudf.testing._utils import ( INTEGER_TYPES, @@ -132,6 +132,10 @@ def test_series_indexing(i1, i2, i3): assert series[i] == a1[i] +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize( "arg", [ @@ -153,9 +157,10 @@ def test_series_get_item_iloc_defer(arg): ps = pd.Series([1, 2, 3], index=pd.Index(["a", "b", "c"])) gs = cudf.from_pandas(ps) - with expect_warning_if(PANDAS_GE_210 and not isinstance(arg, str)): + arg_not_str = not isinstance(arg, str) + with expect_warning_if(arg_not_str): expect = ps[arg] - with expect_warning_if(not isinstance(arg, str)): + with expect_warning_if(arg_not_str): got = gs[arg] assert_eq(expect, got) @@ -907,6 +912,10 @@ def test_dataframe_boolean_mask(mask_fn): assert pdf_masked.to_string().split() == gdf_masked.to_string().split() +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize( "key, value", [ @@ -931,10 +940,7 @@ def test_series_setitem_basics(key, value, nulls): psr[:] = None gsr = cudf.from_pandas(psr) with expect_warning_if( - PANDAS_GE_210 - and isinstance(value, list) - and len(value) == 0 - and nulls == "none" + isinstance(value, list) and len(value) == 0 and nulls == "none" ): psr[key] = value with expect_warning_if( @@ -960,6 +966,10 @@ def test_series_setitem_null(): assert_eq(expect, got) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize( "key, value", [ @@ -984,10 +994,7 @@ def test_series_setitem_iloc(key, value, nulls): psr[:] = None gsr = cudf.from_pandas(psr) with expect_warning_if( - PANDAS_GE_210 - and isinstance(value, list) - and len(value) == 0 - and nulls == "none" + isinstance(value, list) and len(value) == 0 and nulls == "none" ): psr.iloc[key] = value with expect_warning_if( diff --git a/python/cudf/cudf/tests/test_interpolate.py b/python/cudf/cudf/tests/test_interpolate.py index 5ad542546aa..a0e90cc89a2 100644 --- a/python/cudf/cudf/tests/test_interpolate.py +++ b/python/cudf/cudf/tests/test_interpolate.py @@ -3,7 +3,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_210 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.testing._utils import ( assert_eq, assert_exceptions_equal, @@ -35,6 +35,10 @@ def test_interpolate_dataframe(data, method, axis): assert_eq(expect, got) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize( "data", [ @@ -54,9 +58,10 @@ def test_interpolate_series(data, method, axis): gsr = cudf.Series(data) psr = gsr.to_pandas() - with expect_warning_if(PANDAS_GE_210 and psr.dtype == "object"): + is_str_dtype = psr.dtype == "object" + with expect_warning_if(is_str_dtype): expect = psr.interpolate(method=method, axis=axis) - with expect_warning_if(gsr.dtype == "object"): + with expect_warning_if(is_str_dtype): got = gsr.interpolate(method=method, axis=axis) assert_eq(expect, got, check_dtype=psr.dtype != "object") @@ -75,6 +80,10 @@ def test_interpolate_series_unsorted_index(data, index): assert_eq(expect, got) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize( "data", [ @@ -94,9 +103,10 @@ def test_interpolate_series_values_or_index(data, index, method): gsr = cudf.Series(data, index=index) psr = gsr.to_pandas() - with expect_warning_if(PANDAS_GE_210 and gsr.dtype == "object"): + is_str_dtype = gsr.dtype == "object" + with expect_warning_if(is_str_dtype): expect = psr.interpolate(method=method) - with expect_warning_if(gsr.dtype == "object"): + with expect_warning_if(is_str_dtype): got = gsr.interpolate(method=method) assert_eq(expect, got, check_dtype=psr.dtype != "object") diff --git a/python/cudf/cudf/tests/test_join_order.py b/python/cudf/cudf/tests/test_join_order.py index 7031a43d7f5..8d71a6c05b8 100644 --- a/python/cudf/cudf/tests/test_join_order.py +++ b/python/cudf/cudf/tests/test_join_order.py @@ -7,7 +7,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_220 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.testing._utils import assert_eq @@ -45,7 +45,8 @@ def expected(left, right, sort, *, how): def test_join_ordering_pandas_compat(request, left, right, sort, how): request.applymarker( pytest.mark.xfail( - PANDAS_GE_220 and how == "right", + PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION + and how == "right", reason="TODO: Result ording of suffix'ed columns is incorrect", ) ) diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py index 302051ade05..c063043b72a 100644 --- a/python/cudf/cudf/tests/test_joining.py +++ b/python/cudf/cudf/tests/test_joining.py @@ -7,7 +7,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_220 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.dtypes import CategoricalDtype, Decimal64Dtype, Decimal128Dtype from cudf.testing._utils import ( INTEGER_TYPES, @@ -157,6 +157,10 @@ def _check_series(expect, got): assert direct_equal or nanfilled_equal, msg +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="bug in older version of pandas", +) def test_dataframe_join_suffix(): np.random.seed(0) @@ -175,7 +179,7 @@ def test_dataframe_join_suffix(): right.to_pandas(), lsuffix="_left", rsuffix="_right", - sort=PANDAS_GE_220, + sort=True, ) # TODO: Retain result index name expect.index.name = None @@ -1931,10 +1935,7 @@ def test_string_join_key(str_data, num_keys, how): gdf[i] = cudf.Series(str_data, dtype="str") pdf["a"] = other_data gdf["a"] = other_data - if PANDAS_GE_200 and len(other_data) == 0: - # TODO: Remove this workaround after - # the following bug is fixed: - # https://github.com/pandas-dev/pandas/issues/56679 + if len(other_data) == 0: pdf["a"] = pdf["a"].astype("str") pdf2 = pdf.copy() gdf2 = gdf.copy() @@ -2011,10 +2012,7 @@ def test_string_join_non_key(str_data, num_cols, how): gdf[i] = cudf.Series(str_data, dtype="str") pdf["a"] = other_data gdf["a"] = other_data - if PANDAS_GE_200 and len(other_data) == 0: - # TODO: Remove this workaround after - # the following bug is fixed: - # https://github.com/pandas-dev/pandas/issues/56679 + if len(other_data) == 0: pdf["a"] = pdf["a"].astype("str") pdf2 = pdf.copy() diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py index 45f9980ebd6..40935733f34 100644 --- a/python/cudf/cudf/tests/test_json.py +++ b/python/cudf/cudf/tests/test_json.py @@ -13,7 +13,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_210, PANDAS_GE_220 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, @@ -336,18 +336,17 @@ def json_input(request, tmp_path_factory): return Path(fname).as_uri() +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.filterwarnings("ignore:Using CPU") @pytest.mark.parametrize("engine", ["auto", "cudf", "pandas"]) def test_json_lines_basic(json_input, engine): - with expect_warning_if( - isinstance(json_input, str) and not json_input.endswith(".json") - ): + can_warn = isinstance(json_input, str) and not json_input.endswith(".json") + with expect_warning_if(can_warn): cu_df = cudf.read_json(json_input, engine=engine, lines=True) - with expect_warning_if( - isinstance(json_input, str) - and PANDAS_GE_210 - and not json_input.endswith(".json") - ): + with expect_warning_if(can_warn): pd_df = pd.read_json(json_input, lines=True) assert all(cu_df.dtypes == ["int64", "int64", "int64"]) @@ -356,6 +355,10 @@ def test_json_lines_basic(json_input, engine): np.testing.assert_array_equal(pd_df[pd_col], cu_df[cu_col].to_numpy()) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.filterwarnings("ignore:Using CPU") @pytest.mark.parametrize("engine", ["auto", "cudf"]) def test_json_lines_multiple(tmpdir, json_input, engine): @@ -363,9 +366,7 @@ def test_json_lines_multiple(tmpdir, json_input, engine): tmp_file2 = tmpdir.join("MultiInputs2.json") with expect_warning_if( - isinstance(json_input, str) - and PANDAS_GE_210 - and not json_input.endswith(".json") + isinstance(json_input, str) and not json_input.endswith(".json") ): pdf = pd.read_json(json_input, lines=True) pdf.to_json(tmp_file1, compression="infer", lines=True, orient="records") @@ -380,12 +381,14 @@ def test_json_lines_multiple(tmpdir, json_input, engine): np.testing.assert_array_equal(pd_df[pd_col], cu_df[cu_col].to_numpy()) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize("engine", ["auto", "cudf"]) def test_json_read_directory(tmpdir, json_input, engine): with expect_warning_if( - isinstance(json_input, str) - and PANDAS_GE_210 - and not json_input.endswith(".json") + isinstance(json_input, str) and not json_input.endswith(".json") ): pdf = pd.read_json(json_input, lines=True) pdf.to_json( @@ -1175,12 +1178,12 @@ def test_chunked_nested_json_reader(self, tag, data, chunk_size): df = cudf.concat(chunks, ignore_index=True) assert expected.to_arrow().equals(df.to_arrow()) + @pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="https://github.com/pandas-dev/pandas/pull/57439", + ) def test_order_nested_json_reader(self, tag, data): expected = pd.read_json(StringIO(data), lines=True) - if PANDAS_GE_220: - # TODO: Remove after https://github.com/pandas-dev/pandas/issues/57429 - # is fixed - expected = expected.reset_index(drop=True) target = cudf.read_json(StringIO(data), lines=True) # Using pyarrow instead of assert_eq because pandas # doesn't handle nested values comparisons correctly diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 80fc815dd76..69ddd936eee 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -13,7 +13,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_220 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.io.orc import ORCWriter from cudf.testing import assert_frame_equal from cudf.testing._utils import ( @@ -129,23 +129,16 @@ def test_orc_reader_filepath_or_buffer(path_or_buf, src): assert_eq(expect, got) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="Bug in older version of pandas", +) def test_orc_reader_trailing_nulls(datadir): path = datadir / "TestOrcFile.nulls-at-end-snappy.orc" expect = pd.read_orc(path) got = cudf.read_orc(path) - if PANDAS_GE_220: - check_categorical = True - else: - check_categorical = False - expect = expect.fillna(0) - got = got.fillna(0) - - # PANDAS uses NaN to represent invalid data, which forces float dtype - # For comparison, we can replace NaN with 0 and cast to the cuDF dtype - for col in expect.columns: - expect[col] = expect[col].astype(got[col].dtype) - assert_eq(expect, got, check_categorical=check_categorical) + assert_eq(expect, got, check_categorical=True) @pytest.mark.parametrize("use_index", [False, True]) diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index 0d83bd7ebe8..ab2b03d7302 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -21,7 +21,6 @@ from pyarrow import fs as pa_fs, parquet as pq import cudf -from cudf.core._compat import PANDAS_GE_200 from cudf.io.parquet import ( ParquetDatasetWriter, ParquetWriter, @@ -1607,18 +1606,9 @@ def test_parquet_writer_int96_timestamps(tmpdir, pdf, gdf): expect = pdf got = pd.read_parquet(gdf_fname) - if PANDAS_GE_200: - # https://github.com/pandas-dev/pandas/issues/52412 - assert got["col_datetime64[ms]"].dtype == np.dtype("datetime64[ns]") - assert got["col_datetime64[us]"].dtype == np.dtype("datetime64[ns]") - got["col_datetime64[ms]"] = got["col_datetime64[ms]"].astype( - "datetime64[ms]" - ) - got["col_datetime64[us]"] = got["col_datetime64[us]"].astype( - "datetime64[us]" - ) + # verify INT96 timestamps were converted back to the same data. - assert_eq(expect, got, check_categorical=False) + assert_eq(expect, got, check_categorical=False, check_dtype=False) def test_multifile_parquet_folder(tmpdir): @@ -1906,7 +1896,7 @@ def test_parquet_partitioned(tmpdir_factory, cols, filename): # Check that cudf and pd return the same read got_cudf = cudf.read_parquet(gdf_dir) - if PANDAS_GE_200 and isinstance(got_pd["c"].dtype, pd.CategoricalDtype): + if isinstance(got_pd["c"].dtype, pd.CategoricalDtype): # Work-around for pandas bug: # https://github.com/pandas-dev/pandas/issues/53345 got_pd["c"] = got_pd["c"].astype( @@ -1962,15 +1952,15 @@ def test_parquet_writer_chunked_partitioned(tmpdir_factory, return_meta): # Check that cudf and pd return the same read got_cudf = cudf.read_parquet(gdf_dir) - if PANDAS_GE_200: - # Work-around for pandas bug: - # https://github.com/pandas-dev/pandas/issues/53345 - got_pd["a"] = got_pd["a"].astype( - pd.CategoricalDtype( - categories=got_pd["a"].dtype.categories.astype("int64"), - ordered=got_pd["a"].dtype.ordered, - ) + + # Work-around for pandas bug: + # https://github.com/pandas-dev/pandas/issues/53345 + got_pd["a"] = got_pd["a"].astype( + pd.CategoricalDtype( + categories=got_pd["a"].dtype.categories.astype("int64"), + ordered=got_pd["a"].dtype.ordered, ) + ) assert_eq(got_pd, got_cudf) @@ -2011,15 +2001,15 @@ def test_parquet_writer_chunked_max_file_size( # Check that cudf and pd return the same read got_cudf = cudf.read_parquet(gdf_dir) - if PANDAS_GE_200: - # Work-around for pandas bug: - # https://github.com/pandas-dev/pandas/issues/53345 - got_pd["a"] = got_pd["a"].astype( - pd.CategoricalDtype( - categories=got_pd["a"].dtype.categories.astype("int64"), - ordered=got_pd["a"].dtype.ordered, - ) + + # Work-around for pandas bug: + # https://github.com/pandas-dev/pandas/issues/53345 + got_pd["a"] = got_pd["a"].astype( + pd.CategoricalDtype( + categories=got_pd["a"].dtype.categories.astype("int64"), + ordered=got_pd["a"].dtype.ordered, ) + ) assert_eq( got_pd.sort_values(["b"]).reset_index(drop=True), got_cudf.sort_values(["b"]).reset_index(drop=True), @@ -2065,15 +2055,15 @@ def test_parquet_writer_chunked_partitioned_context(tmpdir_factory): # Check that cudf and pd return the same read got_cudf = cudf.read_parquet(gdf_dir) - if PANDAS_GE_200: - # Work-around for pandas bug: - # https://github.com/pandas-dev/pandas/issues/53345 - got_pd["a"] = got_pd["a"].astype( - pd.CategoricalDtype( - categories=got_pd["a"].dtype.categories.astype("int64"), - ordered=got_pd["a"].dtype.ordered, - ) + + # Work-around for pandas bug: + # https://github.com/pandas-dev/pandas/issues/53345 + got_pd["a"] = got_pd["a"].astype( + pd.CategoricalDtype( + categories=got_pd["a"].dtype.categories.astype("int64"), + ordered=got_pd["a"].dtype.ordered, ) + ) assert_eq(got_pd, got_cudf) @@ -2181,15 +2171,15 @@ def test_read_parquet_partitioned_filtered( filters = [[("a", "==", 10)], [("c", "==", 1)]] got = cudf.read_parquet(read_path, filters=filters) expect = pd.read_parquet(read_path, filters=filters) - if PANDAS_GE_200: - # Work-around for pandas bug: - # https://github.com/pandas-dev/pandas/issues/53345 - expect["c"] = expect["c"].astype( - pd.CategoricalDtype( - categories=expect["c"].dtype.categories.astype("int64"), - ordered=expect["c"].dtype.ordered, - ) + + # Work-around for pandas bug: + # https://github.com/pandas-dev/pandas/issues/53345 + expect["c"] = expect["c"].astype( + pd.CategoricalDtype( + categories=expect["c"].dtype.categories.astype("int64"), + ordered=expect["c"].dtype.ordered, ) + ) assert_eq(expect, got) diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py index c667211b6d8..8992c4d617b 100644 --- a/python/cudf/cudf/tests/test_replace.py +++ b/python/cudf/cudf/tests/test_replace.py @@ -8,7 +8,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_210, PANDAS_GE_220 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype from cudf.testing._utils import ( INTEGER_TYPES, @@ -167,6 +167,10 @@ def test_series_replace_with_nulls(): assert_eq(a9, sr9.to_numpy()) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning introduced in pandas-2.2.0", +) @pytest.mark.parametrize( "df", [ @@ -246,25 +250,19 @@ def test_dataframe_replace(df, to_replace, value): else: gd_to_replace = to_replace - with expect_warning_if( - PANDAS_GE_220 - and isinstance(df["a"].dtype, cudf.CategoricalDtype) + can_warn = ( + isinstance(df["a"].dtype, cudf.CategoricalDtype) and isinstance(to_replace, str) and to_replace == "two" and isinstance(value, str) and value == "three" - ): + ) + with expect_warning_if(can_warn): if pd_value is None: expected = pdf.replace(to_replace=pd_to_replace) else: expected = pdf.replace(to_replace=pd_to_replace, value=pd_value) - with expect_warning_if( - isinstance(df["a"].dtype, cudf.CategoricalDtype) - and isinstance(to_replace, str) - and to_replace == "two" - and isinstance(value, str) - and value == "three" - ): + with expect_warning_if(can_warn): actual = gdf.replace(to_replace=gd_to_replace, value=gd_value) expected_sorted = expected.sort_values(by=list(expected.columns), axis=0) @@ -339,6 +337,10 @@ def test_series_fillna_numerical(psr, data_dtype, fill_value, inplace): assert_eq(expected, actual, check_dtype=False) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize( "data", [ @@ -368,7 +370,7 @@ def test_fillna_method_numerical(data, container, data_dtype, method, inplace): # Explicitly using nans_as_nulls=True gdata = cudf.from_pandas(pdata, nan_as_null=True) - with expect_warning_if(PANDAS_GE_210): + with pytest.warns(FutureWarning): expected = pdata.fillna(method=method, inplace=inplace) with pytest.warns(FutureWarning): actual = gdata.fillna(method=method, inplace=inplace) @@ -620,6 +622,10 @@ def test_fillna_datetime(psr_data, fill_value, inplace): assert_eq(expected, got) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize( "data", [ @@ -699,7 +705,7 @@ def test_fillna_method_fixed_width_non_num(data, container, method, inplace): # Explicitly using nans_as_nulls=True gdata = cudf.from_pandas(pdata, nan_as_null=True) - with expect_warning_if(PANDAS_GE_210): + with pytest.warns(FutureWarning): expected = pdata.fillna(method=method, inplace=inplace) with pytest.warns(FutureWarning): actual = gdata.fillna(method=method, inplace=inplace) @@ -1042,7 +1048,7 @@ def test_numeric_series_replace_dtype(series_dtype, replacement): pd.Series(["one", "two", "three"], dtype="category"), {"to_replace": "one", "value": "two", "inplace": True}, marks=pytest.mark.xfail( - condition=PANDAS_GE_200, + condition=PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION, reason="https://github.com/pandas-dev/pandas/issues/43232" "https://github.com/pandas-dev/pandas/issues/53358", ), diff --git a/python/cudf/cudf/tests/test_resampling.py b/python/cudf/cudf/tests/test_resampling.py index a7e04e3fa13..ad6e0ac52c5 100644 --- a/python/cudf/cudf/tests/test_resampling.py +++ b/python/cudf/cudf/tests/test_resampling.py @@ -5,7 +5,6 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_220 from cudf.testing._utils import assert_eq @@ -31,7 +30,7 @@ def test_series_downsample_simple(ts_resolution): assert_resample_results_equal( psr.resample("3min").sum(), gsr.resample("3min").sum(), - check_index=not PANDAS_GE_220, + check_index=False, ) @@ -44,7 +43,7 @@ def test_series_upsample_simple(): assert_resample_results_equal( psr.resample("3min").sum(), gsr.resample("3min").sum(), - check_index=not PANDAS_GE_220, + check_index=False, ) diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py index e632078e0d9..d618669755d 100644 --- a/python/cudf/cudf/tests/test_reshape.py +++ b/python/cudf/cudf/tests/test_reshape.py @@ -9,7 +9,7 @@ import cudf from cudf import melt as cudf_melt -from cudf.core._compat import PANDAS_GE_210 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.buffer.spill_manager import get_global_manager from cudf.testing._utils import ( ALL_TYPES, @@ -155,7 +155,7 @@ def test_df_stack_reset_index(): @pytest.mark.skipif( - not PANDAS_GE_210, + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, reason="Need pandas-2.1.0+ to match `stack` api", ) @pytest.mark.parametrize( @@ -241,7 +241,7 @@ def test_df_stack_mixed_dtypes(): @pytest.mark.skipif( - not PANDAS_GE_210, + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, reason="Need pandas-2.1.0+ to match `stack` api", ) @pytest.mark.parametrize("level", [["animal", "hair_length"], [1, 2]]) diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index fdf9357cb5d..d110f8d8932 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -15,7 +15,7 @@ import cudf from cudf.api.extensions import no_default -from cudf.core._compat import PANDAS_GE_220 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.errors import MixedTypeError from cudf.testing._utils import ( NUMERIC_TYPES, @@ -1748,6 +1748,10 @@ def test_fill_new_category(): gs[0:1] = "d" +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="Warning newly introduced in pandas-2.2.0", +) @pytest.mark.parametrize( "data", [ @@ -1799,7 +1803,7 @@ def test_isin_datetime(data, values): is_len_str = isinstance(next(iter(values), None), str) and len(data) with expect_warning_if(is_len_str): got = gsr.isin(values) - with expect_warning_if(PANDAS_GE_220 and is_len_str): + with expect_warning_if(is_len_str): expected = psr.isin(values) assert_eq(got, expected) diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py index 967c1d27fc1..ff2f7bd41f2 100644 --- a/python/cudf/cudf/tests/test_setitem.py +++ b/python/cudf/cudf/tests/test_setitem.py @@ -5,7 +5,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_210 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.testing._utils import ( assert_eq, assert_exceptions_equal, @@ -282,6 +282,10 @@ def test_series_slice_setitem_struct(): assert_eq(actual, expected) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64]) @pytest.mark.parametrize("indices", [0, [1, 2]]) def test_series_setitem_upcasting(dtype, indices): @@ -293,7 +297,7 @@ def test_series_setitem_upcasting(dtype, indices): # column dtype. new_value = np.float64(np.pi) col_ref = cr._column - with expect_warning_if(PANDAS_GE_210 and dtype != np.float64): + with expect_warning_if(dtype != np.float64): sr[indices] = new_value with expect_warning_if(dtype != np.float64): cr[indices] = new_value diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py index f9ca0e8ebcb..618c4f30bd9 100644 --- a/python/cudf/cudf/tests/test_sorting.py +++ b/python/cudf/cudf/tests/test_sorting.py @@ -8,7 +8,7 @@ import pytest from cudf import DataFrame, Series -from cudf.core._compat import PANDAS_GE_220 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.column import NumericalColumn from cudf.testing._utils import ( DATETIME_TYPES, @@ -49,7 +49,11 @@ def test_dataframe_sort_values(nelem, dtype): @pytest.mark.parametrize("ignore_index", [True, False]) @pytest.mark.parametrize("index", ["a", "b", ["a", "b"]]) def test_dataframe_sort_values_ignore_index(index, ignore_index): - if PANDAS_GE_220 and isinstance(index, list) and not ignore_index: + if ( + PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION + and isinstance(index, list) + and not ignore_index + ): pytest.skip( reason="Unstable sorting by pandas(numpy): https://github.com/pandas-dev/pandas/issues/57531" ) diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py index 9d5f0cd5eab..b9eb42906e8 100644 --- a/python/cudf/cudf/tests/test_stats.py +++ b/python/cudf/cudf/tests/test_stats.py @@ -9,7 +9,7 @@ import cudf from cudf.api.extensions import no_default -from cudf.core._compat import PANDAS_GE_210 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.datasets import randomdata from cudf.testing._utils import ( assert_eq, @@ -340,6 +340,10 @@ def test_series_median(dtype, num_na): np.testing.assert_approx_equal(actual, desired) +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="warning not present in older pandas versions", +) @pytest.mark.parametrize( "data", [ @@ -364,8 +368,7 @@ def test_series_pct_change(data, periods, fill_method): with expect_warning_if(fill_method not in (no_default, None)): got = cs.pct_change(periods=periods, fill_method=fill_method) with expect_warning_if( - PANDAS_GE_210 - and ( + ( fill_method not in (no_default, None) or (fill_method is not None and ps.isna().any()) ) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index ed3461578fd..4843decedba 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -7,7 +7,7 @@ from numba import cuda import cudf -from cudf.core._compat import PANDAS_GE_220 +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.missing import NA from cudf.core.udf._ops import ( arith_ops, @@ -484,7 +484,8 @@ def func(x): @pytest.mark.xfail( - PANDAS_GE_220, reason="https://github.com/pandas-dev/pandas/issues/57390" + PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION, + reason="https://github.com/pandas-dev/pandas/issues/57390", ) def test_series_apply_null_conditional(): def func(x): @@ -511,7 +512,8 @@ def func(x): @pytest.mark.xfail( - PANDAS_GE_220, reason="https://github.com/pandas-dev/pandas/issues/57390" + PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION, + reason="https://github.com/pandas-dev/pandas/issues/57390", ) @pytest.mark.parametrize("op", comparison_ops) def test_series_compare_masked_vs_masked(op): @@ -570,7 +572,8 @@ def func(x): @pytest.mark.xfail( - PANDAS_GE_220, reason="https://github.com/pandas-dev/pandas/issues/57390" + PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION, + reason="https://github.com/pandas-dev/pandas/issues/57390", ) def test_series_masked_is_null_conditional(): def func(x): @@ -756,7 +759,9 @@ def test_masked_udf_scalar_args_binops_multiple_series(request, data, op): data = cudf.Series(data) request.applymarker( pytest.mark.xfail( - op in comparison_ops and PANDAS_GE_220 and data.dtype.kind != "b", + op in comparison_ops + and PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION + and data.dtype.kind != "b", reason="https://github.com/pandas-dev/pandas/issues/57390", ) )