From 097556cfe114fc7980368001938415057ed72d9d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 15 Feb 2024 17:11:49 -0800 Subject: [PATCH 1/3] Add xfail for test_operator_func_series_and_scalar_logical --- python/cudf/cudf/tests/test_binops.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 3ebefa6e071..b3192b50e51 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -13,6 +13,7 @@ import cudf from cudf import Series +from cudf.core._compat import PANDAS_GE_220 from cudf.core.buffer.spill_manager import get_global_manager from cudf.core.index import as_index from cudf.testing import _utils as utils @@ -824,11 +825,21 @@ def test_operator_func_between_series_logical( @pytest.mark.parametrize("fill_value", [None, 1.0]) @pytest.mark.parametrize("use_cudf_scalar", [False, True]) def test_operator_func_series_and_scalar_logical( - dtype, func, has_nulls, scalar, fill_value, use_cudf_scalar + request, dtype, func, has_nulls, scalar, fill_value, use_cudf_scalar ): - gdf_series = utils.gen_rand_series( - dtype, 1000, has_nulls=has_nulls, stride=10000 + request.applymarker( + pytest.mark.xfail( + PANDAS_GE_220 + and fill_value == 1.0 + and scalar is np.nan + and (has_nulls or (not has_nulls and func not in {"eq", "ne"})), + reason="https://github.com/pandas-dev/pandas/issues/57447", + ) ) + if has_nulls: + gdf_series = cudf.Series([-1.0, 0, cudf.NA, 1.1], dtype=dtype) + else: + gdf_series = cudf.Series([-1.0, 0, 10.5, 1.1], dtype=dtype) pdf_series = gdf_series.to_pandas(nullable=True) gdf_series_result = getattr(gdf_series, func)( cudf.Scalar(scalar) if use_cudf_scalar else scalar, From 4ea1f93b6746ea5553dadd606cdc17c04d35021b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 16 Feb 2024 13:00:15 -0800 Subject: [PATCH 2/3] Fix binops with datetime and dateoffset --- python/cudf/cudf/core/column/datetime.py | 4 +--- python/cudf/cudf/tests/test_binops.py | 26 ++++++++++++++---------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 7df22c7d8ea..b2f14b86ed9 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -567,9 +567,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: if other is NotImplemented: return NotImplemented if isinstance(other, cudf.DateOffset): - return other._datetime_binop(self, op, reflect=reflect).astype( - self.dtype - ) + return other._datetime_binop(self, op, reflect=reflect) # We check this on `other` before reflection since we already know the # dtype of `self`. diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index b3192b50e51..bc92303f9b6 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1695,16 +1695,6 @@ def test_scalar_null_binops(op, dtype_l, dtype_r): assert result.dtype == valid_result.dtype -@pytest.mark.parametrize( - "date_col", - [ - [ - "2000-01-01 00:00:00.012345678", - "2000-01-31 00:00:00.012345678", - "2000-02-29 00:00:00.012345678", - ] - ], -) @pytest.mark.parametrize("n_periods", [0, 1, -1, 12, -12]) @pytest.mark.parametrize( "frequency", @@ -1725,8 +1715,22 @@ def test_scalar_null_binops(op, dtype_l, dtype_r): ) @pytest.mark.parametrize("op", [operator.add, operator.sub]) def test_datetime_dateoffset_binaryop( - date_col, n_periods, frequency, dtype, op + request, n_periods, frequency, dtype, op ): + request.applymarker( + pytest.mark.xfail( + PANDAS_GE_220 + and dtype in {"datetime64[ms]", "datetime64[s]"} + and frequency == "microseconds" + and n_periods == 0, + reason="https://github.com/pandas-dev/pandas/issues/57448", + ) + ) + date_col = [ + "2000-01-01 00:00:00.012345678", + "2000-01-31 00:00:00.012345678", + "2000-02-29 00:00:00.012345678", + ] gsr = cudf.Series(date_col, dtype=dtype) psr = gsr.to_pandas() From d1090c430ac904ceaf228d5fdab2e4a58210ec0c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 19 Feb 2024 15:20:33 -0800 Subject: [PATCH 3/3] xfail for pandas 2.1 bug --- python/cudf/cudf/tests/test_binops.py | 53 +++++++++++++++++++++------ 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index bc92303f9b6..6c6dae9e22e 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1726,6 +1726,24 @@ def test_datetime_dateoffset_binaryop( reason="https://github.com/pandas-dev/pandas/issues/57448", ) ) + request.applymarker( + pytest.mark.xfail( + not PANDAS_GE_220 + and dtype in {"datetime64[ms]", "datetime64[s]"} + and frequency in ("microseconds", "nanoseconds") + and n_periods != 0, + reason="https://github.com/pandas-dev/pandas/pull/55595", + ) + ) + request.applymarker( + pytest.mark.xfail( + not PANDAS_GE_220 + and dtype == "datetime64[us]" + and frequency == "nanoseconds" + and n_periods != 0, + reason="https://github.com/pandas-dev/pandas/pull/55595", + ) + ) date_col = [ "2000-01-01 00:00:00.012345678", "2000-01-31 00:00:00.012345678", @@ -1791,16 +1809,6 @@ def test_datetime_dateoffset_binaryop_multiple(date_col, kwargs, op): utils.assert_eq(expect, got) -@pytest.mark.parametrize( - "date_col", - [ - [ - "2000-01-01 00:00:00.012345678", - "2000-01-31 00:00:00.012345678", - "2000-02-29 00:00:00.012345678", - ] - ], -) @pytest.mark.parametrize("n_periods", [0, 1, -1, 12, -12]) @pytest.mark.parametrize( "frequency", @@ -1820,8 +1828,31 @@ def test_datetime_dateoffset_binaryop_multiple(date_col, kwargs, op): ["datetime64[ns]", "datetime64[us]", "datetime64[ms]", "datetime64[s]"], ) def test_datetime_dateoffset_binaryop_reflected( - date_col, n_periods, frequency, dtype + request, n_periods, frequency, dtype ): + request.applymarker( + pytest.mark.xfail( + not PANDAS_GE_220 + and dtype in {"datetime64[ms]", "datetime64[s]"} + and frequency in ("microseconds", "nanoseconds") + and n_periods != 0, + reason="https://github.com/pandas-dev/pandas/pull/55595", + ) + ) + request.applymarker( + pytest.mark.xfail( + not PANDAS_GE_220 + and dtype == "datetime64[us]" + and frequency == "nanoseconds" + and n_periods != 0, + reason="https://github.com/pandas-dev/pandas/pull/55595", + ) + ) + date_col = [ + "2000-01-01 00:00:00.012345678", + "2000-01-31 00:00:00.012345678", + "2000-02-29 00:00:00.012345678", + ] gsr = cudf.Series(date_col, dtype=dtype) psr = gsr.to_pandas() # converts to nanos