From 3e4f95fbc61d71f132ba12f8d9169f36a53d91ba Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 18 Nov 2022 14:16:47 +0100 Subject: [PATCH 1/2] reduce number of tests marked "spilling" --- python/cudf/cudf/tests/test_binops.py | 231 ++++++++++++----------- python/cudf/cudf/tests/test_dataframe.py | 39 ++++ python/cudf/cudf/tests/test_reshape.py | 9 + 3 files changed, 170 insertions(+), 109 deletions(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 6b720c3ad5c..571a0056636 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -14,6 +14,7 @@ import cudf from cudf import Series from cudf.core._compat import PANDAS_GE_150 +from cudf.core.buffer.spill_manager import get_global_manager from cudf.core.index import as_index from cudf.testing import _utils as utils from cudf.utils.dtypes import ( @@ -27,7 +28,6 @@ STRING_TYPES = {"str"} - _binops = [ operator.add, operator.sub, @@ -47,6 +47,127 @@ operator.ge, ] +_bitwise_binops = [operator.and_, operator.or_, operator.xor] + +_int_types = [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", +] + +_cmpops = [ + operator.lt, + operator.gt, + operator.le, + operator.ge, + operator.eq, + operator.ne, +] + +_reflected_ops = [ + lambda x: 1 + x, + lambda x: 2 * x, + lambda x: 2 - x, + lambda x: 2 // x, + lambda x: 2 / x, + lambda x: 3 + x, + lambda x: 3 * x, + lambda x: 3 - x, + lambda x: 3 // x, + lambda x: 3 / x, + lambda x: 3 % x, + lambda x: -1 + x, + lambda x: -2 * x, + lambda x: -2 - x, + lambda x: -2 // x, + lambda x: -2 / x, + lambda x: -3 + x, + lambda x: -3 * x, + lambda x: -3 - x, + lambda x: -3 // x, + lambda x: -3 / x, + lambda x: -3 % x, + lambda x: 0 + x, + lambda x: 0 * x, + lambda x: 0 - x, + lambda x: 0 // x, + lambda x: 0 / x, +] + +_operators_arithmetic = [ + "add", + "radd", + "sub", + "rsub", + "mul", + "rmul", + "mod", + "rmod", + "pow", + "rpow", + "div", + "divide", + "floordiv", + "rfloordiv", + "truediv", + "rtruediv", +] + +_operators_comparison = ["eq", "ne", "lt", "le", "gt", "ge"] + + +_cudf_scalar_reflected_ops = [ + lambda x: cudf.Scalar(1) + x, + lambda x: cudf.Scalar(2) * x, + lambda x: cudf.Scalar(2) - x, + lambda x: cudf.Scalar(2) // x, + lambda x: cudf.Scalar(2) / x, + lambda x: cudf.Scalar(3) + x, + lambda x: cudf.Scalar(3) * x, + lambda x: cudf.Scalar(3) - x, + lambda x: cudf.Scalar(3) // x, + lambda x: cudf.Scalar(3) / x, + lambda x: cudf.Scalar(3) % x, + lambda x: cudf.Scalar(-1) + x, + lambda x: cudf.Scalar(-2) * x, + lambda x: cudf.Scalar(-2) - x, + lambda x: cudf.Scalar(-2) // x, + lambda x: cudf.Scalar(-2) / x, + lambda x: cudf.Scalar(-3) + x, + lambda x: cudf.Scalar(-3) * x, + lambda x: cudf.Scalar(-3) - x, + lambda x: cudf.Scalar(-3) // x, + lambda x: cudf.Scalar(-3) / x, + lambda x: cudf.Scalar(-3) % x, + lambda x: cudf.Scalar(0) + x, + lambda x: cudf.Scalar(0) * x, + lambda x: cudf.Scalar(0) - x, + lambda x: cudf.Scalar(0) // x, + lambda x: cudf.Scalar(0) / x, +] + +# If spilling is enabled globally, we skip many test permutations +# to reduce running time. +if get_global_manager() is not None: + _binops = _binops[:1] + _binops_compare = _binops_compare[:1] + _int_types = _int_types[-1:] + _cmpops = _cmpops[:1] + _reflected_ops = _reflected_ops[:1] + _operators_arithmetic = _operators_arithmetic[:1] + _operators_comparison = _operators_comparison[:1] + _cudf_scalar_reflected_ops = _cudf_scalar_reflected_ops[:1] + DATETIME_TYPES = {"datetime64[ms]"} # noqa: F811 + NUMERIC_TYPES = {"float32"} # noqa: F811 + FLOAT_TYPES = {"float64"} # noqa: F811 + INTEGER_TYPES = {"int16"} # noqa: F811 + TIMEDELTA_TYPES = {"timedelta64[s]"} # noqa: F811 + + pytestmark = pytest.mark.spilling @@ -114,20 +235,6 @@ def test_series_binop_scalar(nelem, binop, obj_class, use_cudf_scalar): np.testing.assert_almost_equal(result.to_numpy(), binop(arr, rhs)) -_bitwise_binops = [operator.and_, operator.or_, operator.xor] - - -_int_types = [ - "int8", - "int16", - "int32", - "int64", - "uint8", - "uint16", - "uint32", -] - - @pytest.mark.parametrize("obj_class", ["Series", "Index"]) @pytest.mark.parametrize("binop", _bitwise_binops) @pytest.mark.parametrize( @@ -152,16 +259,6 @@ def test_series_bitwise_binop(binop, obj_class, lhs_dtype, rhs_dtype): np.testing.assert_almost_equal(result.to_numpy(), binop(arr1, arr2)) -_cmpops = [ - operator.lt, - operator.gt, - operator.le, - operator.ge, - operator.eq, - operator.ne, -] - - @pytest.mark.parametrize("obj_class", ["Series", "Index"]) @pytest.mark.parametrize("cmpop", _cmpops) @pytest.mark.parametrize( @@ -385,37 +482,6 @@ def test_series_cmpop_mixed_dtype(cmpop, lhs_dtype, rhs_dtype, obj_class): np.testing.assert_array_equal(result.to_numpy(), cmpop(lhs, rhs)) -_reflected_ops = [ - lambda x: 1 + x, - lambda x: 2 * x, - lambda x: 2 - x, - lambda x: 2 // x, - lambda x: 2 / x, - lambda x: 3 + x, - lambda x: 3 * x, - lambda x: 3 - x, - lambda x: 3 // x, - lambda x: 3 / x, - lambda x: 3 % x, - lambda x: -1 + x, - lambda x: -2 * x, - lambda x: -2 - x, - lambda x: -2 // x, - lambda x: -2 / x, - lambda x: -3 + x, - lambda x: -3 * x, - lambda x: -3 - x, - lambda x: -3 // x, - lambda x: -3 / x, - lambda x: -3 % x, - lambda x: 0 + x, - lambda x: 0 * x, - lambda x: 0 - x, - lambda x: 0 // x, - lambda x: 0 / x, -] - - @pytest.mark.parametrize("obj_class", ["Series", "Index"]) @pytest.mark.parametrize( "func, dtype", list(product(_reflected_ops, utils.NUMERIC_TYPES)) @@ -458,37 +524,6 @@ def test_cudf_scalar_reflected_ops_scalar(func, dtype): assert np.isclose(expected, actual) -_cudf_scalar_reflected_ops = [ - lambda x: cudf.Scalar(1) + x, - lambda x: cudf.Scalar(2) * x, - lambda x: cudf.Scalar(2) - x, - lambda x: cudf.Scalar(2) // x, - lambda x: cudf.Scalar(2) / x, - lambda x: cudf.Scalar(3) + x, - lambda x: cudf.Scalar(3) * x, - lambda x: cudf.Scalar(3) - x, - lambda x: cudf.Scalar(3) // x, - lambda x: cudf.Scalar(3) / x, - lambda x: cudf.Scalar(3) % x, - lambda x: cudf.Scalar(-1) + x, - lambda x: cudf.Scalar(-2) * x, - lambda x: cudf.Scalar(-2) - x, - lambda x: cudf.Scalar(-2) // x, - lambda x: cudf.Scalar(-2) / x, - lambda x: cudf.Scalar(-3) + x, - lambda x: cudf.Scalar(-3) * x, - lambda x: cudf.Scalar(-3) - x, - lambda x: cudf.Scalar(-3) // x, - lambda x: cudf.Scalar(-3) / x, - lambda x: cudf.Scalar(-3) % x, - lambda x: cudf.Scalar(0) + x, - lambda x: cudf.Scalar(0) * x, - lambda x: cudf.Scalar(0) - x, - lambda x: cudf.Scalar(0) // x, - lambda x: cudf.Scalar(0) / x, -] - - @pytest.mark.parametrize("obj_class", ["Series", "Index"]) @pytest.mark.parametrize( "funcs, dtype", @@ -652,28 +687,6 @@ def test_boolean_scalar_binop(op): utils.assert_eq(op(psr, False), op(gsr, cudf.Scalar(False))) -_operators_arithmetic = [ - "add", - "radd", - "sub", - "rsub", - "mul", - "rmul", - "mod", - "rmod", - "pow", - "rpow", - "div", - "divide", - "floordiv", - "rfloordiv", - "truediv", - "rtruediv", -] - -_operators_comparison = ["eq", "ne", "lt", "le", "gt", "ge"] - - @pytest.mark.parametrize("func", _operators_arithmetic) @pytest.mark.parametrize("has_nulls", [True, False]) @pytest.mark.parametrize("fill_value", [None, 27]) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 105f86df22e..1ab0013418a 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -28,6 +28,7 @@ PANDAS_GE_134, PANDAS_LT_140, ) +from cudf.core.buffer.spill_manager import get_global_manager from cudf.core.column import column from cudf.testing import _utils as utils from cudf.testing._utils import ( @@ -40,8 +41,20 @@ gen_rand, ) +# If spilling is enabled globally, we skip many test permutations +# to reduce running time. +if get_global_manager() is not None: + ALL_TYPES = ["float32"] # noqa: F811 + DATETIME_TYPES = ["datetime64[ms]"] # noqa: F811 + NUMERIC_TYPES = ["float32"] # noqa: F811 + pytestmark = pytest.mark.spilling +# Use this to "unmark" the module level spilling mark +pytest_unmark_spilling = pytest.mark.skipif( + get_global_manager() is not None, reason="unmarked spilling" +) + def test_init_via_list_of_tuples(): data = [ @@ -2067,6 +2080,7 @@ def gdf(pdf): return cudf.DataFrame.from_pandas(pdf) +@pytest_unmark_spilling @pytest.mark.parametrize( "data", [ @@ -2214,6 +2228,7 @@ def _hide_host_other_warning(other): yield +@pytest_unmark_spilling @pytest.mark.parametrize( "binop", [ @@ -2304,6 +2319,7 @@ def test_bitwise_binops_df(pdf, gdf, binop): assert_eq(d, g) +@pytest_unmark_spilling @pytest.mark.parametrize( "binop", [ @@ -2761,6 +2777,7 @@ def test_tail_for_string(): assert_eq(gdf.tail(3), gdf.to_pandas().tail(3)) +@pytest_unmark_spilling @pytest.mark.parametrize("level", [None, 0, "l0", 1, ["l0", 1]]) @pytest.mark.parametrize("drop", [True, False]) @pytest.mark.parametrize( @@ -2804,6 +2821,7 @@ def test_reset_index(level, drop, column_names, inplace, col_level, col_fill): assert_eq(expect, got) +@pytest_unmark_spilling @pytest.mark.parametrize("level", [None, 0, 1, [None]]) @pytest.mark.parametrize("drop", [False, True]) @pytest.mark.parametrize("inplace", [False, True]) @@ -3022,6 +3040,7 @@ def reindex_data_numeric(): ) +@pytest_unmark_spilling @pytest.mark.parametrize("copy", [True, False]) @pytest.mark.parametrize( "args,gd_kwargs", @@ -3178,6 +3197,7 @@ def test_dataframe_empty_sort_index(): assert_eq(expect, got, check_index_type=True) +@pytest_unmark_spilling @pytest.mark.parametrize( "index", [ @@ -3235,6 +3255,7 @@ def test_dataframe_sort_index( assert_eq(expected, got, check_index_type=True) +@pytest_unmark_spilling @pytest.mark.parametrize("axis", [0, 1, "index", "columns"]) @pytest.mark.parametrize( "level", @@ -3872,6 +3893,7 @@ def test_empty_dataframe_any(axis): assert_eq(got, expected, check_index_type=False) +@pytest_unmark_spilling @pytest.mark.parametrize("a", [[], ["123"]]) @pytest.mark.parametrize("b", ["123", ["123"]]) @pytest.mark.parametrize( @@ -4734,6 +4756,7 @@ def test_df_constructor_dtype(dtype): assert_eq(expect, got) +@pytest_unmark_spilling @pytest.mark.parametrize( "data", [ @@ -5200,6 +5223,7 @@ def test_cov_nans(): assert_eq(pdf.cov(), gdf.cov()) +@pytest_unmark_spilling @pytest.mark.parametrize( "gsr", [ @@ -5252,6 +5276,7 @@ def test_df_sr_binop(gsr, colnames, op): assert_eq(expect, got, check_dtype=False) +@pytest_unmark_spilling @pytest.mark.parametrize( "op", [ @@ -6249,6 +6274,7 @@ def test_dataframe_init_from_arrays_cols(data, cols, index): assert_eq(pdf, gdf, check_dtype=False) +@pytest_unmark_spilling @pytest.mark.parametrize( "col_data", [ @@ -6292,6 +6318,7 @@ def test_dataframe_assign_scalar(col_data, assign_val): assert_eq(pdf, gdf) +@pytest_unmark_spilling @pytest.mark.parametrize( "col_data", [ @@ -6587,6 +6614,7 @@ def test_dataframe_info_null_counts(): assert str_cmp == actual_string +@pytest_unmark_spilling @pytest.mark.parametrize( "data1", [ @@ -7064,6 +7092,7 @@ def test_series_keys(ps): assert_eq(ps.keys(), gds.keys()) +@pytest_unmark_spilling @pytest.mark.parametrize( "df", [ @@ -7144,6 +7173,7 @@ def test_dataframe_append_dataframe(df, other, sort, ignore_index): assert_eq(expected, actual, check_index_type=not gdf.empty) +@pytest_unmark_spilling @pytest.mark.parametrize( "df", [ @@ -7227,6 +7257,7 @@ def test_dataframe_append_series_mixed_index(): df.append(sr, ignore_index=True) +@pytest_unmark_spilling @pytest.mark.parametrize( "df", [ @@ -7396,6 +7427,7 @@ def test_dataframe_ffill(df): assert_eq(expected, actual) +@pytest_unmark_spilling @pytest.mark.parametrize( "df", [ @@ -7746,6 +7778,7 @@ def test_dataframe_init_with_columns(data, columns): ) +@pytest_unmark_spilling @pytest.mark.parametrize( "data, ignore_dtype", [ @@ -7825,6 +7858,7 @@ def test_dataframe_init_from_series_list(data, ignore_dtype, columns): assert_eq(expected, actual, check_index_type=True) +@pytest_unmark_spilling @pytest.mark.parametrize( "data, ignore_dtype, index", [ @@ -7995,6 +8029,7 @@ def test_dataframe_iterrows_itertuples(): df.iterrows() +@pytest_unmark_spilling @pytest.mark.parametrize( "df", [ @@ -8072,6 +8107,7 @@ def test_describe_misc_include(df, include): assert_eq(expected, actual) +@pytest_unmark_spilling @pytest.mark.parametrize( "df", [ @@ -8554,6 +8590,7 @@ def test_dataframe_constructor_column_index_only(): ) == id(gdf["c"]._column) +@pytest_unmark_spilling @pytest.mark.parametrize( "data", [ @@ -8655,6 +8692,7 @@ def test_agg_for_dataframe_with_string_columns(aggs): gdf.agg(aggs) +@pytest_unmark_spilling @pytest.mark.parametrize( "join", ["left"], @@ -9469,6 +9507,7 @@ def test_dataframe_rename_duplicate_column(): gdf.rename(columns={"a": "b"}, inplace=True) +@pytest_unmark_spilling @pytest.mark.parametrize( "data", [ diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py index 280b619c305..7942b2e19f7 100644 --- a/python/cudf/cudf/tests/test_reshape.py +++ b/python/cudf/cudf/tests/test_reshape.py @@ -9,6 +9,7 @@ import cudf from cudf import melt as cudf_melt from cudf.core._compat import PANDAS_GE_120 +from cudf.core.buffer.spill_manager import get_global_manager from cudf.testing._utils import ( ALL_TYPES, DATETIME_TYPES, @@ -16,6 +17,14 @@ assert_eq, ) +# If spilling is enabled globally, we skip many test permutations +# to reduce running time. +if get_global_manager() is not None: + ALL_TYPES = ["float32"] # noqa: F811 + DATETIME_TYPES = ["datetime64[ms]"] # noqa: F811 + NUMERIC_TYPES = ["float32"] # noqa: F811 + + pytestmark = pytest.mark.spilling From fb32d6954b285b03852976a1997bba0413625cb5 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 18 Nov 2022 14:28:00 +0100 Subject: [PATCH 2/2] skipping xfail --- python/cudf/cudf/tests/test_binops.py | 28 ++++---- python/cudf/cudf/tests/test_dataframe.py | 89 ++++++++++++------------ python/cudf/cudf/tests/test_reshape.py | 24 ++++--- 3 files changed, 74 insertions(+), 67 deletions(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 571a0056636..5dfb962a4bb 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -150,6 +150,9 @@ lambda x: cudf.Scalar(0) / x, ] +pytest_xfail = pytest.mark.xfail +pytestmark = pytest.mark.spilling + # If spilling is enabled globally, we skip many test permutations # to reduce running time. if get_global_manager() is not None: @@ -166,9 +169,8 @@ FLOAT_TYPES = {"float64"} # noqa: F811 INTEGER_TYPES = {"int16"} # noqa: F811 TIMEDELTA_TYPES = {"timedelta64[s]"} # noqa: F811 - - -pytestmark = pytest.mark.spilling + # To save time, we skip tests marked "pytest.mark.xfail" + pytest_xfail = pytest.mark.skipif @pytest.mark.parametrize("obj_class", ["Series", "Index"]) @@ -900,7 +902,7 @@ def test_binop_bool_uint(func, rhs): ( pytest.param( np.bool_, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason=( "Pandas handling of division by zero-bool is too strange" ) @@ -931,7 +933,7 @@ def test_floordiv_zero_float64(series_dtype, divisor_dtype, scalar_divisor): ( pytest.param( np.bool_, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason=( "Pandas handling of division by zero-bool is too strange" ) @@ -1637,7 +1639,7 @@ def test_scalar_null_binops(op, dtype_l, dtype_r): "microseconds", pytest.param( "nanoseconds", - marks=pytest.mark.xfail( + marks=pytest_xfail( condition=not PANDAS_GE_150, reason="https://github.com/pandas-dev/pandas/issues/36589", ), @@ -1689,19 +1691,19 @@ def test_datetime_dateoffset_binaryop( {"months": 2, "years": 5, "seconds": 923, "microseconds": 481}, pytest.param( {"milliseconds": 4}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Pandas gets the wrong answer for milliseconds" ), ), pytest.param( {"milliseconds": 4, "years": 2}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Pandas construction fails with these keywords" ), ), pytest.param( {"nanoseconds": 12}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Pandas gets the wrong answer for nanoseconds" ), ), @@ -1745,7 +1747,7 @@ def test_datetime_dateoffset_binaryop_multiple(date_col, kwargs, op): "microseconds", pytest.param( "nanoseconds", - marks=pytest.mark.xfail( + marks=pytest_xfail( condition=not PANDAS_GE_150, reason="https://github.com/pandas-dev/pandas/issues/36589", ), @@ -2767,7 +2769,7 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): ), ], ) -@pytest.mark.xfail( +@pytest_xfail( reason="binop operations not supported for different " "bit-width decimal types" ) @@ -2941,7 +2943,7 @@ def decimal_series(input, dtype): ], ) @pytest.mark.parametrize("reflected", [True, False]) -@pytest.mark.xfail( +@pytest_xfail( reason="binop operations not supported for different bit-width " "decimal types" ) @@ -3130,7 +3132,7 @@ def test_empty_column(binop, data, scalar): cudf.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]), pytest.param( cudf.DataFrame([[1, None, None, 4], [5, 6, 7, None]]), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Cannot access Frame.values if frame contains nulls" ), ), diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 1ab0013418a..f6716ece95b 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -41,13 +41,7 @@ gen_rand, ) -# If spilling is enabled globally, we skip many test permutations -# to reduce running time. -if get_global_manager() is not None: - ALL_TYPES = ["float32"] # noqa: F811 - DATETIME_TYPES = ["datetime64[ms]"] # noqa: F811 - NUMERIC_TYPES = ["float32"] # noqa: F811 - +pytest_xfail = pytest.mark.xfail pytestmark = pytest.mark.spilling # Use this to "unmark" the module level spilling mark @@ -55,6 +49,15 @@ get_global_manager() is not None, reason="unmarked spilling" ) +# If spilling is enabled globally, we skip many test permutations +# to reduce running time. +if get_global_manager() is not None: + ALL_TYPES = ["float32"] # noqa: F811 + DATETIME_TYPES = ["datetime64[ms]"] # noqa: F811 + NUMERIC_TYPES = ["float32"] # noqa: F811 + # To save time, we skip tests marked "xfail" + pytest_xfail = pytest.mark.skipif + def test_init_via_list_of_tuples(): data = [ @@ -276,19 +279,19 @@ def test_append_index(a, b): {1: ["a", np.nan, "c"], 2: ["q", None, "u"]}, pytest.param( {}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="https://github.com/rapidsai/cudf/issues/11080" ), ), pytest.param( {1: [], 2: [], 3: []}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="https://github.com/rapidsai/cudf/issues/11080" ), ), pytest.param( [1, 2, 3], - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="https://github.com/rapidsai/cudf/issues/11080" ), ), @@ -2097,7 +2100,7 @@ def gdf(pdf): }, pytest.param( {"x": [], "y": [], "z": []}, - marks=pytest.mark.xfail( + marks=pytest_xfail( condition=version.parse("11") <= version.parse(cupy.__version__) < version.parse("11.1"), @@ -2107,7 +2110,7 @@ def gdf(pdf): ), pytest.param( {"x": []}, - marks=pytest.mark.xfail( + marks=pytest_xfail( condition=version.parse("11") <= version.parse(cupy.__version__) < version.parse("11.1"), @@ -2625,7 +2628,7 @@ def test_dataframe_boolmask(mask_shape): [True, False, True], pytest.param( cudf.Series([True, False, True]), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Pandas can't index a multiindex with a Series" ), ), @@ -2982,7 +2985,7 @@ def test_set_index(data, index, drop, append, inplace): ) @pytest.mark.parametrize("index", ["a", pd.Index([1, 1, 2, 2, 3])]) @pytest.mark.parametrize("verify_integrity", [True]) -@pytest.mark.xfail +@pytest_xfail def test_set_index_verify_integrity(data, index, verify_integrity): gdf = cudf.DataFrame(data) gdf.set_index(index, verify_integrity=verify_integrity) @@ -3212,7 +3215,7 @@ def test_dataframe_empty_sort_index(): pytest.param( pd.RangeIndex(2, -1, -1), marks=[ - pytest.mark.xfail( + pytest_xfail( condition=PANDAS_LT_140, reason="https://github.com/pandas-dev/pandas/issues/43591", ) @@ -3793,7 +3796,7 @@ def test_dataframe_round(decimals): pytest.param( [["a", True], ["b", False], ["c", False]], marks=[ - pytest.mark.xfail( + pytest_xfail( reason="NotImplementedError: all does not " "support columns of object dtype." ) @@ -3845,7 +3848,7 @@ def test_all(data): pytest.param( [["a", True], ["b", False], ["c", False]], marks=[ - pytest.mark.xfail( + pytest_xfail( reason="NotImplementedError: any does not " "support columns of object dtype." ) @@ -4341,11 +4344,11 @@ def test_series_values_host_property(data): [5.0, 7.0, 8.0], pytest.param( pd.Categorical(["a", "b", "c"]), - marks=pytest.mark.xfail(raises=NotImplementedError), + marks=pytest_xfail(raises=NotImplementedError), ), pytest.param( ["m", "a", "d", "v"], - marks=pytest.mark.xfail(raises=TypeError), + marks=pytest_xfail(raises=TypeError), ), ], ) @@ -4366,26 +4369,26 @@ def test_series_values_property(data): {"A": np.float32(np.arange(3)), "B": np.float64(np.arange(3))}, pytest.param( {"A": [1, None, 3], "B": [1, 2, None]}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Nulls not supported by values accessor" ), ), pytest.param( {"A": [None, None, None], "B": [None, None, None]}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Nulls not supported by values accessor" ), ), {"A": [], "B": []}, pytest.param( {"A": [1, 2, 3], "B": ["a", "b", "c"]}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="str or categorical not supported by values accessor" ), ), pytest.param( {"A": pd.Categorical(["a", "b", "c"]), "B": ["d", "e", "f"]}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="str or categorical not supported by values accessor" ), ), @@ -4721,9 +4724,9 @@ def test_empty_df_astype(dtype, args): "errors", [ pytest.param( - "raise", marks=pytest.mark.xfail(reason="should raise error here") + "raise", marks=pytest_xfail(reason="should raise error here") ), - pytest.param("other", marks=pytest.mark.xfail(raises=ValueError)), + pytest.param("other", marks=pytest_xfail(raises=ValueError)), "ignore", ], ) @@ -5211,7 +5214,7 @@ def test_cov(): assert_eq(pdf.cov(), gdf.cov()) -@pytest.mark.xfail(reason="cupy-based cov does not support nulls") +@pytest_xfail(reason="cupy-based cov does not support nulls") def test_cov_nans(): pdf = pd.DataFrame() pdf["a"] = [None, None, None, 2.00758632, None] @@ -5234,7 +5237,7 @@ def test_cov_nans(): cudf.Series([4, 2, 3], index=cudf.core.index.RangeIndex(0, 3)), pytest.param( cudf.Series([4, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"]), - marks=pytest.mark.xfail, + marks=pytest_xfail, ), ], ) @@ -5288,12 +5291,12 @@ def test_df_sr_binop(gsr, colnames, op): operator.pow, # comparison ops will temporarily XFAIL # see PR https://github.com/rapidsai/cudf/pull/7491 - pytest.param(operator.eq, marks=pytest.mark.xfail()), - pytest.param(operator.lt, marks=pytest.mark.xfail()), - pytest.param(operator.le, marks=pytest.mark.xfail()), - pytest.param(operator.gt, marks=pytest.mark.xfail()), - pytest.param(operator.ge, marks=pytest.mark.xfail()), - pytest.param(operator.ne, marks=pytest.mark.xfail()), + pytest.param(operator.eq, marks=pytest_xfail()), + pytest.param(operator.lt, marks=pytest_xfail()), + pytest.param(operator.le, marks=pytest_xfail()), + pytest.param(operator.gt, marks=pytest_xfail()), + pytest.param(operator.ge, marks=pytest_xfail()), + pytest.param(operator.ne, marks=pytest_xfail()), ], ) @pytest.mark.parametrize( @@ -5355,7 +5358,7 @@ def test_memory_usage(deep, index, set_index): ) -@pytest.mark.xfail +@pytest_xfail def test_memory_usage_string(): rows = int(100) df = pd.DataFrame( @@ -7204,7 +7207,7 @@ def test_dataframe_append_dataframe(df, other, sort, ignore_index): pd.Series([10, 11, 23, 234, 13]), pytest.param( pd.Series([10, 11, 23, 234, 13], index=[11, 12, 13, 44, 33]), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="pandas bug: " "https://github.com/pandas-dev/pandas/issues/35092" ), @@ -8062,7 +8065,7 @@ def test_dataframe_iterrows_itertuples(): ), } ), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="https://github.com/rapidsai/cudf/issues/6219" ), ), @@ -8083,7 +8086,7 @@ def test_dataframe_iterrows_itertuples(): ), } ), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="https://github.com/rapidsai/cudf/issues/6219" ), ), @@ -8140,7 +8143,7 @@ def test_describe_misc_include(df, include): ), } ), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="https://github.com/rapidsai/cudf/issues/6219" ), ), @@ -8161,7 +8164,7 @@ def test_describe_misc_include(df, include): ), } ), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="https://github.com/rapidsai/cudf/issues/6219" ), ), @@ -8920,7 +8923,7 @@ def test_rename_for_level_RangeIndex_dataframe(): assert_eq(expect, got) -@pytest.mark.xfail(reason="level=None not implemented yet") +@pytest_xfail(reason="level=None not implemented yet") def test_rename_for_level_is_None_MC(): gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) gdf.columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]) @@ -9353,7 +9356,7 @@ def test_groupby_cov_positive_semidefinite_matrix(): ) -@pytest.mark.xfail +@pytest_xfail def test_groupby_cov_for_pandas_bug_case(): # Handles case: pandas bug using ddof with missing data. # Filed an issue in Pandas on GH, link below: @@ -9750,14 +9753,14 @@ def test_multiindex_wildcard_selection_all(wildcard_df): assert_eq(expect, got) -@pytest.mark.xfail(reason="Not yet properly supported.") +@pytest_xfail(reason="Not yet properly supported.") def test_multiindex_wildcard_selection_partial(wildcard_df): expect = wildcard_df.to_pandas().loc[:, (slice("a", "b"), "b")] got = wildcard_df.loc[:, (slice("a", "b"), "b")] assert_eq(expect, got) -@pytest.mark.xfail(reason="Not yet properly supported.") +@pytest_xfail(reason="Not yet properly supported.") def test_multiindex_wildcard_selection_three_level_all(): midx = cudf.MultiIndex.from_tuples( [(c1, c2, c3) for c1 in "abcd" for c2 in "abc" for c3 in "ab"] diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py index 7942b2e19f7..6336565af52 100644 --- a/python/cudf/cudf/tests/test_reshape.py +++ b/python/cudf/cudf/tests/test_reshape.py @@ -17,15 +17,17 @@ assert_eq, ) +pytest_xfail = pytest.mark.xfail +pytestmark = pytest.mark.spilling + # If spilling is enabled globally, we skip many test permutations # to reduce running time. if get_global_manager() is not None: ALL_TYPES = ["float32"] # noqa: F811 DATETIME_TYPES = ["datetime64[ms]"] # noqa: F811 NUMERIC_TYPES = ["float32"] # noqa: F811 - - -pytestmark = pytest.mark.spilling + # To save time, we skip tests marked "pytest.mark.xfail" + pytest_xfail = pytest.mark.skipif @pytest.mark.parametrize("num_id_vars", [0, 1, 2]) @@ -89,7 +91,7 @@ def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype): + [ pytest.param( "str", - marks=pytest.mark.xfail( + marks=pytest_xfail( condition=not PANDAS_GE_120, reason="pandas bug" ), ) @@ -452,7 +454,7 @@ def test_pivot_values(values): 0, pytest.param( 1, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Categorical column indexes not supported" ), ), @@ -460,7 +462,7 @@ def test_pivot_values(values): "foo", pytest.param( "bar", - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Categorical column indexes not supported" ), ), @@ -468,24 +470,24 @@ def test_pivot_values(values): [], pytest.param( [0, 1], - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Categorical column indexes not supported" ), ), ["foo"], pytest.param( ["foo", "bar"], - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Categorical column indexes not supported" ), ), pytest.param( [0, 1, 2], - marks=pytest.mark.xfail(reason="Pandas behaviour unclear"), + marks=pytest_xfail(reason="Pandas behaviour unclear"), ), pytest.param( ["foo", "bar", "baz"], - marks=pytest.mark.xfail(reason="Pandas behaviour unclear"), + marks=pytest_xfail(reason="Pandas behaviour unclear"), ), ], ) @@ -517,7 +519,7 @@ def test_unstack_multiindex(level): pd.Index(range(0, 5), name="row_index"), pytest.param( pd.CategoricalIndex(["d", "e", "f", "g", "h"]), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Categorical column indexes not supported" ), ),