From e01e9c6acecd822adf86dfc1e1acea391daf4d5e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 19 Dec 2022 15:07:38 -0800 Subject: [PATCH 1/3] Use applymarker to only xfail the cases that should actually fail. --- python/cudf/cudf/tests/test_dataframe.py | 28 ++++++++++++++---------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 823abbafd7d..0fadfe63c65 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -2349,17 +2349,6 @@ def test_comparison_binops_df(pdf, gdf, binop, other): @pytest_unmark_spilling -@pytest.mark.xfail( - reason=""" - Currently we will not match pandas for equality/inequality operators when - there are columns that exist in a Series but not the DataFrame because - pandas returns True/False values whereas we return NA. However, this - reindexing is deprecated in pandas so we opt not to add support. This test - should start passing once pandas removes the deprecated behavior in 2.0. - When that happens, this test can be merged with the two tests above into a - single test with common parameters. - """ -) @pytest.mark.parametrize( "binop", [ @@ -2381,7 +2370,7 @@ def test_comparison_binops_df(pdf, gdf, binop, other): pd.Series([1.0, 2.0, 3.0], index=["x", "y", "z"]), ], ) -def test_comparison_binops_df_reindexing(pdf, gdf, binop, other): +def test_comparison_binops_df_reindexing(request, pdf, gdf, binop, other): # Avoid 1**NA cases: https://github.com/pandas-dev/pandas/issues/29997 pdf[pdf == 1.0] = 2 gdf[gdf == 1.0] = 2 @@ -2401,6 +2390,21 @@ def test_comparison_binops_df_reindexing(pdf, gdf, binop, other): compare_error_message=False, ) else: + request.applymarker( + pytest.mark.xfail( + reason=""" + Currently we will not match pandas for equality/inequality + operators when there are columns that exist in a Series but not + the DataFrame because pandas returns True/False values whereas + we return NA. However, this reindexing is deprecated in pandas + so we opt not to add support. This test should start passing + once pandas removes the deprecated behavior in 2.0. When that + happens, this test can be merged with the two tests above into + a single test with common parameters. + """ + ) + ) + if isinstance(other, (pd.Series, pd.DataFrame)): other = cudf.from_pandas(other) g = binop(gdf, other) From 451115c970e433b88d7fb8c49337c973b73e598e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 19 Dec 2022 15:11:48 -0800 Subject: [PATCH 2/3] Remove outdated comment. --- python/cudf/cudf/tests/test_dataframe.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 0fadfe63c65..4eb42af45e9 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -5185,11 +5185,6 @@ def test_rowwise_ops_datetime_dtypes(data, op, skipna): pdf = gdf.to_pandas() - # TODO: This behavior seems erroneous in pandas. Why is the min/max over - # a mix of datetime and numeric dtypes not just throwing an error? This - # test will have to be rewritten anyway in pandas 2.0 when the implicit - # numeric_only behavior changes, at which point the dtype mixing should be - # reconsidered as well. with expect_warning_if( not all(cudf.api.types.is_datetime64_dtype(dt) for dt in gdf.dtypes), UserWarning, From 9eedf971b8d637f9344263ed5fffb3f915fe3b51 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 19 Dec 2022 15:40:59 -0800 Subject: [PATCH 3/3] Make the xfail conditional. --- python/cudf/cudf/tests/test_dataframe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 4eb42af45e9..cb9345444f9 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -2392,6 +2392,7 @@ def test_comparison_binops_df_reindexing(request, pdf, gdf, binop, other): else: request.applymarker( pytest.mark.xfail( + condition=pdf.columns.difference(other.index).size > 0, reason=""" Currently we will not match pandas for equality/inequality operators when there are columns that exist in a Series but not @@ -2401,7 +2402,7 @@ def test_comparison_binops_df_reindexing(request, pdf, gdf, binop, other): once pandas removes the deprecated behavior in 2.0. When that happens, this test can be merged with the two tests above into a single test with common parameters. - """ + """, ) )