From a2f69e4a1605f2fe20601e47831c828a519d1b73 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 18 Nov 2022 18:05:38 +0100 Subject: [PATCH] Reduce number of tests marked `spilling` (#12197) To save CI running time, this PR reduce the tests marked `spilling` drastically. An alternative to https://github.com/rapidsai/cudf/pull/12187 Authors: - Mads R. B. Kristensen (https://github.com/madsbk) Approvers: - https://github.com/brandon-b-miller - GALI PREM SAGAR (https://github.com/galipremsagar) --- python/cudf/cudf/tests/test_binops.py | 253 ++++++++++++----------- python/cudf/cudf/tests/test_dataframe.py | 114 ++++++---- python/cudf/cudf/tests/test_reshape.py | 27 ++- 3 files changed, 231 insertions(+), 163 deletions(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 6b720c3ad5c..5dfb962a4bb 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -14,6 +14,7 @@ import cudf from cudf import Series from cudf.core._compat import PANDAS_GE_150 +from cudf.core.buffer.spill_manager import get_global_manager from cudf.core.index import as_index from cudf.testing import _utils as utils from cudf.utils.dtypes import ( @@ -27,7 +28,6 @@ STRING_TYPES = {"str"} - _binops = [ operator.add, operator.sub, @@ -47,8 +47,131 @@ operator.ge, ] +_bitwise_binops = [operator.and_, operator.or_, operator.xor] + +_int_types = [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", +] + +_cmpops = [ + operator.lt, + operator.gt, + operator.le, + operator.ge, + operator.eq, + operator.ne, +] + +_reflected_ops = [ + lambda x: 1 + x, + lambda x: 2 * x, + lambda x: 2 - x, + lambda x: 2 // x, + lambda x: 2 / x, + lambda x: 3 + x, + lambda x: 3 * x, + lambda x: 3 - x, + lambda x: 3 // x, + lambda x: 3 / x, + lambda x: 3 % x, + lambda x: -1 + x, + lambda x: -2 * x, + lambda x: -2 - x, + lambda x: -2 // x, + lambda x: -2 / x, + lambda x: -3 + x, + lambda x: -3 * x, + lambda x: -3 - x, + lambda x: -3 // x, + lambda x: -3 / x, + lambda x: -3 % x, + lambda x: 0 + x, + lambda x: 0 * x, + lambda x: 0 - x, + lambda x: 0 // x, + lambda x: 0 / x, +] + +_operators_arithmetic = [ + "add", + "radd", + "sub", + "rsub", + "mul", + "rmul", + "mod", + "rmod", + "pow", + "rpow", + "div", + "divide", + "floordiv", + "rfloordiv", + "truediv", + "rtruediv", +] + +_operators_comparison = ["eq", "ne", "lt", "le", "gt", "ge"] + + +_cudf_scalar_reflected_ops = [ + lambda x: cudf.Scalar(1) + x, + lambda x: cudf.Scalar(2) * x, + lambda x: cudf.Scalar(2) - x, + lambda x: cudf.Scalar(2) // x, + lambda x: cudf.Scalar(2) / x, + lambda x: cudf.Scalar(3) + x, + lambda x: cudf.Scalar(3) * x, + lambda x: cudf.Scalar(3) - x, + lambda x: cudf.Scalar(3) // x, + lambda x: cudf.Scalar(3) / x, + lambda x: cudf.Scalar(3) % x, + lambda x: cudf.Scalar(-1) + x, + lambda x: cudf.Scalar(-2) * x, + lambda x: cudf.Scalar(-2) - x, + lambda x: cudf.Scalar(-2) // x, + lambda x: cudf.Scalar(-2) / x, + lambda x: cudf.Scalar(-3) + x, + lambda x: cudf.Scalar(-3) * x, + lambda x: cudf.Scalar(-3) - x, + lambda x: cudf.Scalar(-3) // x, + lambda x: cudf.Scalar(-3) / x, + lambda x: cudf.Scalar(-3) % x, + lambda x: cudf.Scalar(0) + x, + lambda x: cudf.Scalar(0) * x, + lambda x: cudf.Scalar(0) - x, + lambda x: cudf.Scalar(0) // x, + lambda x: cudf.Scalar(0) / x, +] + +pytest_xfail = pytest.mark.xfail pytestmark = pytest.mark.spilling +# If spilling is enabled globally, we skip many test permutations +# to reduce running time. +if get_global_manager() is not None: + _binops = _binops[:1] + _binops_compare = _binops_compare[:1] + _int_types = _int_types[-1:] + _cmpops = _cmpops[:1] + _reflected_ops = _reflected_ops[:1] + _operators_arithmetic = _operators_arithmetic[:1] + _operators_comparison = _operators_comparison[:1] + _cudf_scalar_reflected_ops = _cudf_scalar_reflected_ops[:1] + DATETIME_TYPES = {"datetime64[ms]"} # noqa: F811 + NUMERIC_TYPES = {"float32"} # noqa: F811 + FLOAT_TYPES = {"float64"} # noqa: F811 + INTEGER_TYPES = {"int16"} # noqa: F811 + TIMEDELTA_TYPES = {"timedelta64[s]"} # noqa: F811 + # To save time, we skip tests marked "pytest.mark.xfail" + pytest_xfail = pytest.mark.skipif + @pytest.mark.parametrize("obj_class", ["Series", "Index"]) @pytest.mark.parametrize("binop", _binops) @@ -114,20 +237,6 @@ def test_series_binop_scalar(nelem, binop, obj_class, use_cudf_scalar): np.testing.assert_almost_equal(result.to_numpy(), binop(arr, rhs)) -_bitwise_binops = [operator.and_, operator.or_, operator.xor] - - -_int_types = [ - "int8", - "int16", - "int32", - "int64", - "uint8", - "uint16", - "uint32", -] - - @pytest.mark.parametrize("obj_class", ["Series", "Index"]) @pytest.mark.parametrize("binop", _bitwise_binops) @pytest.mark.parametrize( @@ -152,16 +261,6 @@ def test_series_bitwise_binop(binop, obj_class, lhs_dtype, rhs_dtype): np.testing.assert_almost_equal(result.to_numpy(), binop(arr1, arr2)) -_cmpops = [ - operator.lt, - operator.gt, - operator.le, - operator.ge, - operator.eq, - operator.ne, -] - - @pytest.mark.parametrize("obj_class", ["Series", "Index"]) @pytest.mark.parametrize("cmpop", _cmpops) @pytest.mark.parametrize( @@ -385,37 +484,6 @@ def test_series_cmpop_mixed_dtype(cmpop, lhs_dtype, rhs_dtype, obj_class): np.testing.assert_array_equal(result.to_numpy(), cmpop(lhs, rhs)) -_reflected_ops = [ - lambda x: 1 + x, - lambda x: 2 * x, - lambda x: 2 - x, - lambda x: 2 // x, - lambda x: 2 / x, - lambda x: 3 + x, - lambda x: 3 * x, - lambda x: 3 - x, - lambda x: 3 // x, - lambda x: 3 / x, - lambda x: 3 % x, - lambda x: -1 + x, - lambda x: -2 * x, - lambda x: -2 - x, - lambda x: -2 // x, - lambda x: -2 / x, - lambda x: -3 + x, - lambda x: -3 * x, - lambda x: -3 - x, - lambda x: -3 // x, - lambda x: -3 / x, - lambda x: -3 % x, - lambda x: 0 + x, - lambda x: 0 * x, - lambda x: 0 - x, - lambda x: 0 // x, - lambda x: 0 / x, -] - - @pytest.mark.parametrize("obj_class", ["Series", "Index"]) @pytest.mark.parametrize( "func, dtype", list(product(_reflected_ops, utils.NUMERIC_TYPES)) @@ -458,37 +526,6 @@ def test_cudf_scalar_reflected_ops_scalar(func, dtype): assert np.isclose(expected, actual) -_cudf_scalar_reflected_ops = [ - lambda x: cudf.Scalar(1) + x, - lambda x: cudf.Scalar(2) * x, - lambda x: cudf.Scalar(2) - x, - lambda x: cudf.Scalar(2) // x, - lambda x: cudf.Scalar(2) / x, - lambda x: cudf.Scalar(3) + x, - lambda x: cudf.Scalar(3) * x, - lambda x: cudf.Scalar(3) - x, - lambda x: cudf.Scalar(3) // x, - lambda x: cudf.Scalar(3) / x, - lambda x: cudf.Scalar(3) % x, - lambda x: cudf.Scalar(-1) + x, - lambda x: cudf.Scalar(-2) * x, - lambda x: cudf.Scalar(-2) - x, - lambda x: cudf.Scalar(-2) // x, - lambda x: cudf.Scalar(-2) / x, - lambda x: cudf.Scalar(-3) + x, - lambda x: cudf.Scalar(-3) * x, - lambda x: cudf.Scalar(-3) - x, - lambda x: cudf.Scalar(-3) // x, - lambda x: cudf.Scalar(-3) / x, - lambda x: cudf.Scalar(-3) % x, - lambda x: cudf.Scalar(0) + x, - lambda x: cudf.Scalar(0) * x, - lambda x: cudf.Scalar(0) - x, - lambda x: cudf.Scalar(0) // x, - lambda x: cudf.Scalar(0) / x, -] - - @pytest.mark.parametrize("obj_class", ["Series", "Index"]) @pytest.mark.parametrize( "funcs, dtype", @@ -652,28 +689,6 @@ def test_boolean_scalar_binop(op): utils.assert_eq(op(psr, False), op(gsr, cudf.Scalar(False))) -_operators_arithmetic = [ - "add", - "radd", - "sub", - "rsub", - "mul", - "rmul", - "mod", - "rmod", - "pow", - "rpow", - "div", - "divide", - "floordiv", - "rfloordiv", - "truediv", - "rtruediv", -] - -_operators_comparison = ["eq", "ne", "lt", "le", "gt", "ge"] - - @pytest.mark.parametrize("func", _operators_arithmetic) @pytest.mark.parametrize("has_nulls", [True, False]) @pytest.mark.parametrize("fill_value", [None, 27]) @@ -887,7 +902,7 @@ def test_binop_bool_uint(func, rhs): ( pytest.param( np.bool_, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason=( "Pandas handling of division by zero-bool is too strange" ) @@ -918,7 +933,7 @@ def test_floordiv_zero_float64(series_dtype, divisor_dtype, scalar_divisor): ( pytest.param( np.bool_, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason=( "Pandas handling of division by zero-bool is too strange" ) @@ -1624,7 +1639,7 @@ def test_scalar_null_binops(op, dtype_l, dtype_r): "microseconds", pytest.param( "nanoseconds", - marks=pytest.mark.xfail( + marks=pytest_xfail( condition=not PANDAS_GE_150, reason="https://github.com/pandas-dev/pandas/issues/36589", ), @@ -1676,19 +1691,19 @@ def test_datetime_dateoffset_binaryop( {"months": 2, "years": 5, "seconds": 923, "microseconds": 481}, pytest.param( {"milliseconds": 4}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Pandas gets the wrong answer for milliseconds" ), ), pytest.param( {"milliseconds": 4, "years": 2}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Pandas construction fails with these keywords" ), ), pytest.param( {"nanoseconds": 12}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Pandas gets the wrong answer for nanoseconds" ), ), @@ -1732,7 +1747,7 @@ def test_datetime_dateoffset_binaryop_multiple(date_col, kwargs, op): "microseconds", pytest.param( "nanoseconds", - marks=pytest.mark.xfail( + marks=pytest_xfail( condition=not PANDAS_GE_150, reason="https://github.com/pandas-dev/pandas/issues/36589", ), @@ -2754,7 +2769,7 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): ), ], ) -@pytest.mark.xfail( +@pytest_xfail( reason="binop operations not supported for different " "bit-width decimal types" ) @@ -2928,7 +2943,7 @@ def decimal_series(input, dtype): ], ) @pytest.mark.parametrize("reflected", [True, False]) -@pytest.mark.xfail( +@pytest_xfail( reason="binop operations not supported for different bit-width " "decimal types" ) @@ -3117,7 +3132,7 @@ def test_empty_column(binop, data, scalar): cudf.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]), pytest.param( cudf.DataFrame([[1, None, None, 4], [5, 6, 7, None]]), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Cannot access Frame.values if frame contains nulls" ), ), diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 105f86df22e..f6716ece95b 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -28,6 +28,7 @@ PANDAS_GE_134, PANDAS_LT_140, ) +from cudf.core.buffer.spill_manager import get_global_manager from cudf.core.column import column from cudf.testing import _utils as utils from cudf.testing._utils import ( @@ -40,8 +41,23 @@ gen_rand, ) +pytest_xfail = pytest.mark.xfail pytestmark = pytest.mark.spilling +# Use this to "unmark" the module level spilling mark +pytest_unmark_spilling = pytest.mark.skipif( + get_global_manager() is not None, reason="unmarked spilling" +) + +# If spilling is enabled globally, we skip many test permutations +# to reduce running time. +if get_global_manager() is not None: + ALL_TYPES = ["float32"] # noqa: F811 + DATETIME_TYPES = ["datetime64[ms]"] # noqa: F811 + NUMERIC_TYPES = ["float32"] # noqa: F811 + # To save time, we skip tests marked "xfail" + pytest_xfail = pytest.mark.skipif + def test_init_via_list_of_tuples(): data = [ @@ -263,19 +279,19 @@ def test_append_index(a, b): {1: ["a", np.nan, "c"], 2: ["q", None, "u"]}, pytest.param( {}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="https://github.com/rapidsai/cudf/issues/11080" ), ), pytest.param( {1: [], 2: [], 3: []}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="https://github.com/rapidsai/cudf/issues/11080" ), ), pytest.param( [1, 2, 3], - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="https://github.com/rapidsai/cudf/issues/11080" ), ), @@ -2067,6 +2083,7 @@ def gdf(pdf): return cudf.DataFrame.from_pandas(pdf) +@pytest_unmark_spilling @pytest.mark.parametrize( "data", [ @@ -2083,7 +2100,7 @@ def gdf(pdf): }, pytest.param( {"x": [], "y": [], "z": []}, - marks=pytest.mark.xfail( + marks=pytest_xfail( condition=version.parse("11") <= version.parse(cupy.__version__) < version.parse("11.1"), @@ -2093,7 +2110,7 @@ def gdf(pdf): ), pytest.param( {"x": []}, - marks=pytest.mark.xfail( + marks=pytest_xfail( condition=version.parse("11") <= version.parse(cupy.__version__) < version.parse("11.1"), @@ -2214,6 +2231,7 @@ def _hide_host_other_warning(other): yield +@pytest_unmark_spilling @pytest.mark.parametrize( "binop", [ @@ -2304,6 +2322,7 @@ def test_bitwise_binops_df(pdf, gdf, binop): assert_eq(d, g) +@pytest_unmark_spilling @pytest.mark.parametrize( "binop", [ @@ -2609,7 +2628,7 @@ def test_dataframe_boolmask(mask_shape): [True, False, True], pytest.param( cudf.Series([True, False, True]), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Pandas can't index a multiindex with a Series" ), ), @@ -2761,6 +2780,7 @@ def test_tail_for_string(): assert_eq(gdf.tail(3), gdf.to_pandas().tail(3)) +@pytest_unmark_spilling @pytest.mark.parametrize("level", [None, 0, "l0", 1, ["l0", 1]]) @pytest.mark.parametrize("drop", [True, False]) @pytest.mark.parametrize( @@ -2804,6 +2824,7 @@ def test_reset_index(level, drop, column_names, inplace, col_level, col_fill): assert_eq(expect, got) +@pytest_unmark_spilling @pytest.mark.parametrize("level", [None, 0, 1, [None]]) @pytest.mark.parametrize("drop", [False, True]) @pytest.mark.parametrize("inplace", [False, True]) @@ -2964,7 +2985,7 @@ def test_set_index(data, index, drop, append, inplace): ) @pytest.mark.parametrize("index", ["a", pd.Index([1, 1, 2, 2, 3])]) @pytest.mark.parametrize("verify_integrity", [True]) -@pytest.mark.xfail +@pytest_xfail def test_set_index_verify_integrity(data, index, verify_integrity): gdf = cudf.DataFrame(data) gdf.set_index(index, verify_integrity=verify_integrity) @@ -3022,6 +3043,7 @@ def reindex_data_numeric(): ) +@pytest_unmark_spilling @pytest.mark.parametrize("copy", [True, False]) @pytest.mark.parametrize( "args,gd_kwargs", @@ -3178,6 +3200,7 @@ def test_dataframe_empty_sort_index(): assert_eq(expect, got, check_index_type=True) +@pytest_unmark_spilling @pytest.mark.parametrize( "index", [ @@ -3192,7 +3215,7 @@ def test_dataframe_empty_sort_index(): pytest.param( pd.RangeIndex(2, -1, -1), marks=[ - pytest.mark.xfail( + pytest_xfail( condition=PANDAS_LT_140, reason="https://github.com/pandas-dev/pandas/issues/43591", ) @@ -3235,6 +3258,7 @@ def test_dataframe_sort_index( assert_eq(expected, got, check_index_type=True) +@pytest_unmark_spilling @pytest.mark.parametrize("axis", [0, 1, "index", "columns"]) @pytest.mark.parametrize( "level", @@ -3772,7 +3796,7 @@ def test_dataframe_round(decimals): pytest.param( [["a", True], ["b", False], ["c", False]], marks=[ - pytest.mark.xfail( + pytest_xfail( reason="NotImplementedError: all does not " "support columns of object dtype." ) @@ -3824,7 +3848,7 @@ def test_all(data): pytest.param( [["a", True], ["b", False], ["c", False]], marks=[ - pytest.mark.xfail( + pytest_xfail( reason="NotImplementedError: any does not " "support columns of object dtype." ) @@ -3872,6 +3896,7 @@ def test_empty_dataframe_any(axis): assert_eq(got, expected, check_index_type=False) +@pytest_unmark_spilling @pytest.mark.parametrize("a", [[], ["123"]]) @pytest.mark.parametrize("b", ["123", ["123"]]) @pytest.mark.parametrize( @@ -4319,11 +4344,11 @@ def test_series_values_host_property(data): [5.0, 7.0, 8.0], pytest.param( pd.Categorical(["a", "b", "c"]), - marks=pytest.mark.xfail(raises=NotImplementedError), + marks=pytest_xfail(raises=NotImplementedError), ), pytest.param( ["m", "a", "d", "v"], - marks=pytest.mark.xfail(raises=TypeError), + marks=pytest_xfail(raises=TypeError), ), ], ) @@ -4344,26 +4369,26 @@ def test_series_values_property(data): {"A": np.float32(np.arange(3)), "B": np.float64(np.arange(3))}, pytest.param( {"A": [1, None, 3], "B": [1, 2, None]}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Nulls not supported by values accessor" ), ), pytest.param( {"A": [None, None, None], "B": [None, None, None]}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Nulls not supported by values accessor" ), ), {"A": [], "B": []}, pytest.param( {"A": [1, 2, 3], "B": ["a", "b", "c"]}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="str or categorical not supported by values accessor" ), ), pytest.param( {"A": pd.Categorical(["a", "b", "c"]), "B": ["d", "e", "f"]}, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="str or categorical not supported by values accessor" ), ), @@ -4699,9 +4724,9 @@ def test_empty_df_astype(dtype, args): "errors", [ pytest.param( - "raise", marks=pytest.mark.xfail(reason="should raise error here") + "raise", marks=pytest_xfail(reason="should raise error here") ), - pytest.param("other", marks=pytest.mark.xfail(raises=ValueError)), + pytest.param("other", marks=pytest_xfail(raises=ValueError)), "ignore", ], ) @@ -4734,6 +4759,7 @@ def test_df_constructor_dtype(dtype): assert_eq(expect, got) +@pytest_unmark_spilling @pytest.mark.parametrize( "data", [ @@ -5188,7 +5214,7 @@ def test_cov(): assert_eq(pdf.cov(), gdf.cov()) -@pytest.mark.xfail(reason="cupy-based cov does not support nulls") +@pytest_xfail(reason="cupy-based cov does not support nulls") def test_cov_nans(): pdf = pd.DataFrame() pdf["a"] = [None, None, None, 2.00758632, None] @@ -5200,6 +5226,7 @@ def test_cov_nans(): assert_eq(pdf.cov(), gdf.cov()) +@pytest_unmark_spilling @pytest.mark.parametrize( "gsr", [ @@ -5210,7 +5237,7 @@ def test_cov_nans(): cudf.Series([4, 2, 3], index=cudf.core.index.RangeIndex(0, 3)), pytest.param( cudf.Series([4, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"]), - marks=pytest.mark.xfail, + marks=pytest_xfail, ), ], ) @@ -5252,6 +5279,7 @@ def test_df_sr_binop(gsr, colnames, op): assert_eq(expect, got, check_dtype=False) +@pytest_unmark_spilling @pytest.mark.parametrize( "op", [ @@ -5263,12 +5291,12 @@ def test_df_sr_binop(gsr, colnames, op): operator.pow, # comparison ops will temporarily XFAIL # see PR https://github.com/rapidsai/cudf/pull/7491 - pytest.param(operator.eq, marks=pytest.mark.xfail()), - pytest.param(operator.lt, marks=pytest.mark.xfail()), - pytest.param(operator.le, marks=pytest.mark.xfail()), - pytest.param(operator.gt, marks=pytest.mark.xfail()), - pytest.param(operator.ge, marks=pytest.mark.xfail()), - pytest.param(operator.ne, marks=pytest.mark.xfail()), + pytest.param(operator.eq, marks=pytest_xfail()), + pytest.param(operator.lt, marks=pytest_xfail()), + pytest.param(operator.le, marks=pytest_xfail()), + pytest.param(operator.gt, marks=pytest_xfail()), + pytest.param(operator.ge, marks=pytest_xfail()), + pytest.param(operator.ne, marks=pytest_xfail()), ], ) @pytest.mark.parametrize( @@ -5330,7 +5358,7 @@ def test_memory_usage(deep, index, set_index): ) -@pytest.mark.xfail +@pytest_xfail def test_memory_usage_string(): rows = int(100) df = pd.DataFrame( @@ -6249,6 +6277,7 @@ def test_dataframe_init_from_arrays_cols(data, cols, index): assert_eq(pdf, gdf, check_dtype=False) +@pytest_unmark_spilling @pytest.mark.parametrize( "col_data", [ @@ -6292,6 +6321,7 @@ def test_dataframe_assign_scalar(col_data, assign_val): assert_eq(pdf, gdf) +@pytest_unmark_spilling @pytest.mark.parametrize( "col_data", [ @@ -6587,6 +6617,7 @@ def test_dataframe_info_null_counts(): assert str_cmp == actual_string +@pytest_unmark_spilling @pytest.mark.parametrize( "data1", [ @@ -7064,6 +7095,7 @@ def test_series_keys(ps): assert_eq(ps.keys(), gds.keys()) +@pytest_unmark_spilling @pytest.mark.parametrize( "df", [ @@ -7144,6 +7176,7 @@ def test_dataframe_append_dataframe(df, other, sort, ignore_index): assert_eq(expected, actual, check_index_type=not gdf.empty) +@pytest_unmark_spilling @pytest.mark.parametrize( "df", [ @@ -7174,7 +7207,7 @@ def test_dataframe_append_dataframe(df, other, sort, ignore_index): pd.Series([10, 11, 23, 234, 13]), pytest.param( pd.Series([10, 11, 23, 234, 13], index=[11, 12, 13, 44, 33]), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="pandas bug: " "https://github.com/pandas-dev/pandas/issues/35092" ), @@ -7227,6 +7260,7 @@ def test_dataframe_append_series_mixed_index(): df.append(sr, ignore_index=True) +@pytest_unmark_spilling @pytest.mark.parametrize( "df", [ @@ -7396,6 +7430,7 @@ def test_dataframe_ffill(df): assert_eq(expected, actual) +@pytest_unmark_spilling @pytest.mark.parametrize( "df", [ @@ -7746,6 +7781,7 @@ def test_dataframe_init_with_columns(data, columns): ) +@pytest_unmark_spilling @pytest.mark.parametrize( "data, ignore_dtype", [ @@ -7825,6 +7861,7 @@ def test_dataframe_init_from_series_list(data, ignore_dtype, columns): assert_eq(expected, actual, check_index_type=True) +@pytest_unmark_spilling @pytest.mark.parametrize( "data, ignore_dtype, index", [ @@ -7995,6 +8032,7 @@ def test_dataframe_iterrows_itertuples(): df.iterrows() +@pytest_unmark_spilling @pytest.mark.parametrize( "df", [ @@ -8027,7 +8065,7 @@ def test_dataframe_iterrows_itertuples(): ), } ), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="https://github.com/rapidsai/cudf/issues/6219" ), ), @@ -8048,7 +8086,7 @@ def test_dataframe_iterrows_itertuples(): ), } ), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="https://github.com/rapidsai/cudf/issues/6219" ), ), @@ -8072,6 +8110,7 @@ def test_describe_misc_include(df, include): assert_eq(expected, actual) +@pytest_unmark_spilling @pytest.mark.parametrize( "df", [ @@ -8104,7 +8143,7 @@ def test_describe_misc_include(df, include): ), } ), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="https://github.com/rapidsai/cudf/issues/6219" ), ), @@ -8125,7 +8164,7 @@ def test_describe_misc_include(df, include): ), } ), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="https://github.com/rapidsai/cudf/issues/6219" ), ), @@ -8554,6 +8593,7 @@ def test_dataframe_constructor_column_index_only(): ) == id(gdf["c"]._column) +@pytest_unmark_spilling @pytest.mark.parametrize( "data", [ @@ -8655,6 +8695,7 @@ def test_agg_for_dataframe_with_string_columns(aggs): gdf.agg(aggs) +@pytest_unmark_spilling @pytest.mark.parametrize( "join", ["left"], @@ -8882,7 +8923,7 @@ def test_rename_for_level_RangeIndex_dataframe(): assert_eq(expect, got) -@pytest.mark.xfail(reason="level=None not implemented yet") +@pytest_xfail(reason="level=None not implemented yet") def test_rename_for_level_is_None_MC(): gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) gdf.columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]) @@ -9315,7 +9356,7 @@ def test_groupby_cov_positive_semidefinite_matrix(): ) -@pytest.mark.xfail +@pytest_xfail def test_groupby_cov_for_pandas_bug_case(): # Handles case: pandas bug using ddof with missing data. # Filed an issue in Pandas on GH, link below: @@ -9469,6 +9510,7 @@ def test_dataframe_rename_duplicate_column(): gdf.rename(columns={"a": "b"}, inplace=True) +@pytest_unmark_spilling @pytest.mark.parametrize( "data", [ @@ -9711,14 +9753,14 @@ def test_multiindex_wildcard_selection_all(wildcard_df): assert_eq(expect, got) -@pytest.mark.xfail(reason="Not yet properly supported.") +@pytest_xfail(reason="Not yet properly supported.") def test_multiindex_wildcard_selection_partial(wildcard_df): expect = wildcard_df.to_pandas().loc[:, (slice("a", "b"), "b")] got = wildcard_df.loc[:, (slice("a", "b"), "b")] assert_eq(expect, got) -@pytest.mark.xfail(reason="Not yet properly supported.") +@pytest_xfail(reason="Not yet properly supported.") def test_multiindex_wildcard_selection_three_level_all(): midx = cudf.MultiIndex.from_tuples( [(c1, c2, c3) for c1 in "abcd" for c2 in "abc" for c3 in "ab"] diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py index 280b619c305..6336565af52 100644 --- a/python/cudf/cudf/tests/test_reshape.py +++ b/python/cudf/cudf/tests/test_reshape.py @@ -9,6 +9,7 @@ import cudf from cudf import melt as cudf_melt from cudf.core._compat import PANDAS_GE_120 +from cudf.core.buffer.spill_manager import get_global_manager from cudf.testing._utils import ( ALL_TYPES, DATETIME_TYPES, @@ -16,8 +17,18 @@ assert_eq, ) +pytest_xfail = pytest.mark.xfail pytestmark = pytest.mark.spilling +# If spilling is enabled globally, we skip many test permutations +# to reduce running time. +if get_global_manager() is not None: + ALL_TYPES = ["float32"] # noqa: F811 + DATETIME_TYPES = ["datetime64[ms]"] # noqa: F811 + NUMERIC_TYPES = ["float32"] # noqa: F811 + # To save time, we skip tests marked "pytest.mark.xfail" + pytest_xfail = pytest.mark.skipif + @pytest.mark.parametrize("num_id_vars", [0, 1, 2]) @pytest.mark.parametrize("num_value_vars", [0, 1, 2]) @@ -80,7 +91,7 @@ def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype): + [ pytest.param( "str", - marks=pytest.mark.xfail( + marks=pytest_xfail( condition=not PANDAS_GE_120, reason="pandas bug" ), ) @@ -443,7 +454,7 @@ def test_pivot_values(values): 0, pytest.param( 1, - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Categorical column indexes not supported" ), ), @@ -451,7 +462,7 @@ def test_pivot_values(values): "foo", pytest.param( "bar", - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Categorical column indexes not supported" ), ), @@ -459,24 +470,24 @@ def test_pivot_values(values): [], pytest.param( [0, 1], - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Categorical column indexes not supported" ), ), ["foo"], pytest.param( ["foo", "bar"], - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Categorical column indexes not supported" ), ), pytest.param( [0, 1, 2], - marks=pytest.mark.xfail(reason="Pandas behaviour unclear"), + marks=pytest_xfail(reason="Pandas behaviour unclear"), ), pytest.param( ["foo", "bar", "baz"], - marks=pytest.mark.xfail(reason="Pandas behaviour unclear"), + marks=pytest_xfail(reason="Pandas behaviour unclear"), ), ], ) @@ -508,7 +519,7 @@ def test_unstack_multiindex(level): pd.Index(range(0, 5), name="row_index"), pytest.param( pd.CategoricalIndex(["d", "e", "f", "g", "h"]), - marks=pytest.mark.xfail( + marks=pytest_xfail( reason="Categorical column indexes not supported" ), ),