Skip to content

Commit

Permalink
Change cross-pandas-version testing in cudf (#15145)
Browse files Browse the repository at this point in the history
This PR removes redundant version checks in a lot of pytests.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Bradley Dice (https://github.com/bdice)

URL: #15145
  • Loading branch information
galipremsagar authored Mar 13, 2024
1 parent 39a365b commit fe9642b
Show file tree
Hide file tree
Showing 26 changed files with 310 additions and 245 deletions.
19 changes: 19 additions & 0 deletions docs/cudf/source/developer_guide/testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -249,3 +249,22 @@ In particular:
- `testing._utils.assert_eq` is the biggest hammer to reach for. It can be used to compare any pair of objects.
- For comparing specific objects, use `testing.testing.assert_[frame|series|index]_equal`.
- For verifying that the expected assertions are raised, use `testing._utils.assert_exceptions_equal`.


### Version testing

It is recommended to have `cudf` pytests only work on the latest supported pandas version i.e., `PANDAS_CURRENT_SUPPORTED_VERSION`. Any anticipated failures should be either `skipped` or `xfailed`.

For example:

```python
@pytest.mark.skipif(PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, reason="bug in older version of pandas")
def test_bug_from_older_pandas_versions(...):
...

@pytest.mark.xfail(PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION, reason="bug in latest version of pandas")
def test_bug_in_current_and_maybe_future_versions(...):
...
```

If pandas makes a bugfix release and fixes this, then we'll see it in CI immediately, patch it, and bump `PANDAS_CURRENT_SUPPORTED_VERSION` which also usually happens during pandas upgrades.
8 changes: 3 additions & 5 deletions python/cudf/cudf/core/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@
import pandas as pd
from packaging import version

PANDAS_CURRENT_SUPPORTED_VERSION = version.parse("2.2.1")
PANDAS_VERSION = version.parse(pd.__version__)
PANDAS_EQ_200 = PANDAS_VERSION == version.parse("2.0.0")
PANDAS_GE_200 = PANDAS_VERSION >= version.parse("2.0.0")
PANDAS_GE_201 = PANDAS_VERSION >= version.parse("2.0.1")


PANDAS_GE_210 = PANDAS_VERSION >= version.parse("2.1.0")
PANDAS_GE_214 = PANDAS_VERSION >= version.parse("2.1.4")
PANDAS_LT_203 = PANDAS_VERSION < version.parse("2.0.3")
PANDAS_GE_220 = PANDAS_VERSION >= version.parse("2.2.0")
PANDAS_LT_300 = PANDAS_VERSION < version.parse("3.0.0")
8 changes: 4 additions & 4 deletions python/cudf/cudf/tests/indexes/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest

import cudf
from cudf.core._compat import PANDAS_GE_210
from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
from cudf.core.index import IntervalIndex, interval_range
from cudf.testing._utils import assert_eq

Expand Down Expand Up @@ -315,8 +315,8 @@ def test_interval_index_from_breaks(closed):
1.0,
0.2,
None,
marks=pytest.mark.xfail(
condition=not PANDAS_GE_210,
marks=pytest.mark.skipif(
condition=PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="https://github.com/pandas-dev/pandas/pull/54477",
),
),
Expand All @@ -327,7 +327,7 @@ def test_interval_index_from_breaks(closed):
0.1,
None,
marks=pytest.mark.xfail(
condition=not PANDAS_GE_210,
condition=PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="https://github.com/pandas-dev/pandas/pull/54477",
),
),
Expand Down
29 changes: 22 additions & 7 deletions python/cudf/cudf/tests/test_api_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@

import cudf
from cudf.api import types
from cudf.core._compat import PANDAS_GE_210, PANDAS_GE_214, PANDAS_GE_220
from cudf.testing._utils import expect_warning_if
from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION


@pytest.mark.parametrize(
Expand Down Expand Up @@ -499,8 +498,22 @@ def test_is_integer(obj, expect):
(pd.Series(dtype="int"), False),
(pd.Series(dtype="float"), False),
(pd.Series(dtype="complex"), False),
(pd.Series(dtype="str"), PANDAS_GE_220),
(pd.Series(dtype="unicode"), PANDAS_GE_220),
pytest.param(
pd.Series(dtype="str"),
True,
marks=pytest.mark.skipif(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="bug in previous pandas versions",
),
),
pytest.param(
pd.Series(dtype="unicode"),
True,
marks=pytest.mark.skipif(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="bug in previous pandas versions",
),
),
(pd.Series(dtype="datetime64[s]"), False),
(pd.Series(dtype="timedelta64[s]"), False),
(pd.Series(dtype="category"), False),
Expand Down Expand Up @@ -964,6 +977,10 @@ def test_is_decimal_dtype(obj, expect):
assert types.is_decimal_dtype(obj) == expect


@pytest.mark.skipif(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="inconsistent warnings in older pandas versions",
)
@pytest.mark.parametrize(
"obj",
(
Expand Down Expand Up @@ -1037,9 +1054,7 @@ def test_is_decimal_dtype(obj, expect):
),
)
def test_pandas_agreement(obj):
with expect_warning_if(
PANDAS_GE_210, DeprecationWarning if PANDAS_GE_214 else FutureWarning
):
with pytest.warns(DeprecationWarning):
expected = pd_types.is_categorical_dtype(obj)
with pytest.warns(DeprecationWarning):
actual = types.is_categorical_dtype(obj)
Expand Down
10 changes: 7 additions & 3 deletions python/cudf/cudf/tests/test_applymap.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@
import pytest

from cudf import NA, DataFrame
from cudf.core._compat import PANDAS_GE_210, PANDAS_GE_220
from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
from cudf.testing import _utils as utils


@pytest.mark.skipif(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="warning not present in all versions of pandas",
)
@pytest.mark.parametrize(
"data",
[
Expand All @@ -29,15 +33,15 @@
def test_applymap_dataframe(data, func, na_action, request):
request.applymarker(
pytest.mark.xfail(
PANDAS_GE_220
PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
and request.node.callspec.id == "None-<lambda>2-data3",
reason="https://github.com/pandas-dev/pandas/issues/57390",
)
)
gdf = DataFrame(data)
pdf = gdf.to_pandas(nullable=True)

with utils.expect_warning_if(PANDAS_GE_210):
with pytest.warns(FutureWarning):
expect = pdf.applymap(func, na_action=na_action)
with pytest.warns(FutureWarning):
got = gdf.applymap(func, na_action=na_action)
Expand Down
20 changes: 15 additions & 5 deletions python/cudf/cudf/tests/test_array_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
import pytest

import cudf
from cudf.core._compat import PANDAS_GE_210, PANDAS_LT_300
from cudf.core._compat import (
PANDAS_CURRENT_SUPPORTED_VERSION,
PANDAS_LT_300,
PANDAS_VERSION,
)
from cudf.testing._utils import (
assert_eq,
expect_warning_if,
Expand Down Expand Up @@ -143,6 +147,10 @@ def test_binary_ufunc_index_array(ufunc, reflect):
assert_eq(got, expect, check_exact=False)


@pytest.mark.skipif(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="warning not present in older pandas versions",
)
@pytest.mark.parametrize("ufunc", _UFUNCS)
@pytest.mark.parametrize("has_nulls", [True, False])
@pytest.mark.parametrize("indexed", [True, False])
Expand Down Expand Up @@ -231,8 +239,7 @@ def test_ufunc_series(request, ufunc, has_nulls, indexed):
else:
if has_nulls:
with expect_warning_if(
PANDAS_GE_210
and fname
fname
in (
"isfinite",
"isinf",
Expand Down Expand Up @@ -351,6 +358,10 @@ def test_ufunc_cudf_series_error_with_out_kwarg(func):


# Skip matmul since it requires aligned shapes.
@pytest.mark.skipif(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="warning not present in older pandas versions",
)
@pytest.mark.parametrize("ufunc", (uf for uf in _UFUNCS if uf != np.matmul))
@pytest.mark.parametrize("has_nulls", [True, False])
@pytest.mark.parametrize("indexed", [True, False])
Expand Down Expand Up @@ -431,8 +442,7 @@ def test_ufunc_dataframe(request, ufunc, has_nulls, indexed):
else:
if has_nulls:
with expect_warning_if(
PANDAS_GE_210
and fname
fname
in (
"isfinite",
"isinf",
Expand Down
8 changes: 4 additions & 4 deletions python/cudf/cudf/tests/test_binops.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

import cudf
from cudf import Series
from cudf.core._compat import PANDAS_GE_220
from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
from cudf.core.buffer.spill_manager import get_global_manager
from cudf.core.index import as_index
from cudf.testing import _utils as utils
Expand Down Expand Up @@ -829,7 +829,7 @@ def test_operator_func_series_and_scalar_logical(
):
request.applymarker(
pytest.mark.xfail(
PANDAS_GE_220
PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
and fill_value == 1.0
and scalar is np.nan
and (has_nulls or (not has_nulls and func not in {"eq", "ne"})),
Expand Down Expand Up @@ -1719,7 +1719,7 @@ def test_datetime_dateoffset_binaryop(
):
request.applymarker(
pytest.mark.xfail(
PANDAS_GE_220
PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
and dtype in {"datetime64[ms]", "datetime64[s]"}
and frequency == "microseconds"
and n_periods == 0,
Expand Down Expand Up @@ -1829,7 +1829,7 @@ def test_datetime_dateoffset_binaryop_reflected(n_periods, frequency, dtype):

# TODO: Remove check_dtype once we get some clarity on:
# https://github.com/pandas-dev/pandas/issues/57448
utils.assert_eq(expect, got, check_dtype=not PANDAS_GE_220)
utils.assert_eq(expect, got, check_dtype=False)

with pytest.raises(TypeError):
poffset - psr
Expand Down
14 changes: 7 additions & 7 deletions python/cudf/cudf/tests/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import cudf
from cudf import read_csv
from cudf.core._compat import PANDAS_GE_200
from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
from cudf.testing._utils import assert_eq, assert_exceptions_equal


Expand Down Expand Up @@ -344,6 +344,10 @@ def test_csv_reader_dtype_extremes(use_names):
assert_eq(gdf, pdf)


@pytest.mark.skipif(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="https://github.com/pandas-dev/pandas/issues/52449",
)
def test_csv_reader_skiprows_skipfooter(tmpdir, pd_mixed_dataframe):
fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file5.csv")

Expand Down Expand Up @@ -372,12 +376,8 @@ def test_csv_reader_skiprows_skipfooter(tmpdir, pd_mixed_dataframe):

assert len(out.columns) == len(df_out.columns)
assert len(out) == len(df_out)
if PANDAS_GE_200:
# TODO: Remove typecast to `ns` after following
# issue is fixed:
# https://github.com/pandas-dev/pandas/issues/52449
out["2"] = out["2"].astype("datetime64[ns]")
assert_eq(df_out, out)

assert_eq(df_out, out, check_dtype=False)


def test_csv_reader_negative_vals(tmpdir):
Expand Down
18 changes: 9 additions & 9 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

import cudf
from cudf.api.extensions import no_default
from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_210
from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
from cudf.core.buffer.spill_manager import get_global_manager
from cudf.core.column import column
from cudf.errors import MixedTypeError
Expand Down Expand Up @@ -1347,11 +1347,7 @@ def test_dataframe_setitem_from_masked_object():
def test_dataframe_append_to_empty():
pdf = pd.DataFrame()
pdf["a"] = []
if PANDAS_GE_200:
# TODO: Remove this workaround after
# the following bug is fixed:
# https://github.com/pandas-dev/pandas/issues/56679
pdf["a"] = pdf["a"].astype("str")
pdf["a"] = pdf["a"].astype("str")
pdf["b"] = [1, 2, 3]

gdf = cudf.DataFrame()
Expand Down Expand Up @@ -6724,7 +6720,8 @@ def test_dataframe_init_from_arrays_cols(data, cols, index):
def test_dataframe_assign_scalar(request, col_data, assign_val):
request.applymarker(
pytest.mark.xfail(
condition=PANDAS_GE_200 and len(col_data) == 0,
condition=PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
and len(col_data) == 0,
reason="https://github.com/pandas-dev/pandas/issues/56679",
)
)
Expand Down Expand Up @@ -9970,6 +9967,10 @@ def test_dataframe_rename_duplicate_column():


@pytest_unmark_spilling
@pytest.mark.skipif(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="warning not present in older pandas versions",
)
@pytest.mark.parametrize(
"data",
[
Expand All @@ -9990,8 +9991,7 @@ def test_dataframe_pct_change(data, periods, fill_method):
with expect_warning_if(fill_method is not no_default):
actual = gdf.pct_change(periods=periods, fill_method=fill_method)
with expect_warning_if(
PANDAS_GE_210
and (fill_method is not no_default or pdf.isna().any().any())
fill_method is not no_default or pdf.isna().any().any()
):
expected = pdf.pct_change(periods=periods, fill_method=fill_method)

Expand Down
Loading

0 comments on commit fe9642b

Please sign in to comment.