From d4d42b4b1f7470ca25b137ee87dc5898999db08c Mon Sep 17 00:00:00 2001 From: Anatoly Myachev <45976948+anmyachev@users.noreply.github.com> Date: Tue, 8 Sep 2020 13:14:26 +0300 Subject: [PATCH] TEST-#2033: speed up test_series.py (#2034) * TEST-#2033: speed up test_series.py Signed-off-by: Anatoly Myachev * TEST-#2033: fix kurtosis test Signed-off-by: Anatoly Myachev * TEST-#2033: refactor test for #1953 Signed-off-by: Anatoly Myachev --- modin/pandas/test/test_series.py | 168 +++++++++++-------------------- 1 file changed, 59 insertions(+), 109 deletions(-) diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py index 13bb7450711..d21267c8242 100644 --- a/modin/pandas/test/test_series.py +++ b/modin/pandas/test/test_series.py @@ -61,6 +61,8 @@ test_data_small_keys, test_data_categorical_values, test_data_categorical_keys, + generate_multiindex, + test_data_diff_dtype, ) pd.DEFAULT_NPARTITIONS = 4 @@ -1850,10 +1852,22 @@ def test_keys(data): df_equals(modin_series.keys(), pandas_series.keys()) -@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) -@pytest.mark.parametrize("axis", axis_values, ids=axis_keys) +def test_kurtosis_alias(): + # It's optimization. If failed, Series.kurt should be tested explicitly + # in tests: `test_kurt_kurtosis`, `test_kurt_kurtosis_level`. + assert pd.Series.kurt == pd.Series.kurtosis + + +@pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("skipna", bool_arg_values, ids=bool_arg_keys) -@pytest.mark.parametrize("level", [None, -1, 0, 1]) +def test_kurtosis(axis, skipna): + eval_general( + *create_test_series(test_data["float_nan_data"]), + lambda df: df.kurtosis(axis=axis, skipna=skipna), + ) + + +@pytest.mark.parametrize("axis", ["rows", "columns"]) @pytest.mark.parametrize( "numeric_only", [ @@ -1867,20 +1881,26 @@ def test_keys(data): None, ], ) -@pytest.mark.parametrize("method", ["kurtosis", "kurt"]) -def test_kurt_kurtosis(data, axis, skipna, level, numeric_only, method): - func_kwargs = { - "axis": axis, - "skipna": skipna, - "level": level, - "numeric_only": numeric_only, - } - modin_series, pandas_series = create_test_series(data) +def test_kurtosis_numeric_only(axis, numeric_only): + eval_general( + *create_test_series(test_data_diff_dtype), + lambda df: df.kurtosis(axis=axis, numeric_only=numeric_only), + ) + + +@pytest.mark.parametrize("level", [-1, 0, 1]) +def test_kurtosis_level(level): + data = test_data["int_data"] + modin_s, pandas_s = create_test_series(data) + + index = generate_multiindex(len(data.keys())) + modin_s.columns = index + pandas_s.columns = index eval_general( - modin_series, - pandas_series, - lambda df: df.kurtosis(**func_kwargs), + modin_s, + pandas_s, + lambda s: s.kurtosis(axis=1, level=level), ) @@ -2015,16 +2035,18 @@ def test_median(data, skipna): modin_series, pandas_series = create_test_series(data) df_equals(modin_series.median(skipna=skipna), pandas_series.median(skipna=skipna)) - # test for issue #1953 + +@pytest.mark.parametrize("method", ["median", "skew", "std", "sum", "var", "prod"]) +def test_median_skew_std_sum_var_prod_1953(method): + # See #1953 for details + data = [3, 3, 3, 3, 3, 3, 3, 3, 3] arrays = [ ["1", "1", "1", "2", "2", "2", "3", "3", "3"], ["1", "2", "3", "4", "5", "6", "7", "8", "9"], ] - modin_series = pd.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays) - pandas_series = pandas.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays) - modin_result = modin_series.median(level=0) - pandas_result = pandas_series.median(level=0) - df_equals(modin_result, pandas_result) + modin_s = pd.Series(data, index=arrays) + pandas_s = pandas.Series(data, index=arrays) + eval_general(modin_s, pandas_s, lambda s: getattr(s, method)(level=0)) @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @@ -2206,66 +2228,38 @@ def test_pow(data): inter_df_math_helper(modin_series, pandas_series, "pow") -@pytest.mark.parametrize( - "data", - test_data_values + test_data_small_values, - ids=test_data_keys + test_data_small_keys, -) -@pytest.mark.parametrize("axis", axis_values, ids=axis_keys) +def test_product_alias(): + assert pd.Series.prod == pd.Series.product + + +@pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize( "skipna", bool_arg_values, ids=arg_keys("skipna", bool_arg_keys) ) +def test_prod(axis, skipna): + eval_general( + *create_test_series(test_data["float_nan_data"]), + lambda s: s.prod(axis=axis, skipna=skipna), + ) + + @pytest.mark.parametrize( "numeric_only", [ None, False, - pytest.param( - True, - marks=pytest.mark.xfail( - reason="numeric_only not implemented for pandas.Series" - ), - ), + pytest.param(True, marks=pytest.mark.xfail(reason="didn't raise Exception")), ], ) @pytest.mark.parametrize( "min_count", int_arg_values, ids=arg_keys("min_count", int_arg_keys) ) -@pytest.mark.parametrize( - "operation", - [ - "prod", - pytest.param( - "product", - marks=pytest.mark.skipif( - pandas.Series.product == pandas.Series.prod - and pd.Series.product == pd.Series.prod, - reason="That operation was already tested.", - ), - ), - ], -) -def test_prod(data, axis, skipna, numeric_only, min_count, operation): +def test_prod_specific(min_count, numeric_only): eval_general( - *create_test_series(data), - lambda df, *args, **kwargs: type(df)([getattr(df, operation)(*args, **kwargs)]), - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - min_count=min_count, + *create_test_series(test_data_diff_dtype), + lambda df: df.prod(min_count=min_count, numeric_only=numeric_only), ) - # test for issue #1953 - arrays = [ - ["1", "1", "1", "2", "2", "2", "3", "3", "3"], - ["1", "2", "3", "4", "5", "6", "7", "8", "9"], - ] - modin_series = pd.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays) - pandas_series = pandas.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays) - modin_result = modin_series.prod(level=0) - pandas_result = pandas_series.prod(level=0) - df_equals(modin_result, pandas_result) - @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @pytest.mark.parametrize("q", quantiles_values, ids=quantiles_keys) @@ -2784,17 +2778,6 @@ def test_skew(data, skipna): modin_series, pandas_series = create_test_series(data) df_equals(modin_series.skew(skipna=skipna), pandas_series.skew(skipna=skipna)) - # test for issue #1953 - arrays = [ - ["1", "1", "1", "2", "2", "2", "3", "3", "3"], - ["1", "2", "3", "4", "5", "6", "7", "8", "9"], - ] - modin_series = pd.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays) - pandas_series = pandas.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays) - modin_result = modin_series.skew(level=0) - pandas_result = pandas_series.skew(level=0) - df_equals(modin_result, pandas_result) - @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @pytest.mark.parametrize("index", ["default", "ndarray"]) @@ -2913,17 +2896,6 @@ def test_std(request, data, skipna, ddof): modin_result = modin_series.std(skipna=skipna, ddof=ddof) df_equals(modin_result, pandas_result) - # test for issue #1953 - arrays = [ - ["1", "1", "1", "2", "2", "2", "3", "3", "3"], - ["1", "2", "3", "4", "5", "6", "7", "8", "9"], - ] - modin_series = pd.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays) - pandas_series = pandas.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays) - modin_result = modin_series.std(level=0) - pandas_result = pandas_series.std(level=0) - df_equals(modin_result, pandas_result) - @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) def test_sub(data): @@ -2972,17 +2944,6 @@ def test_sum(data, axis, skipna, numeric_only, min_count): min_count=min_count, ) - # test for issue #1953 - arrays = [ - ["1", "1", "1", "2", "2", "2", "3", "3", "3"], - ["1", "2", "3", "4", "5", "6", "7", "8", "9"], - ] - modin_series = pd.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays) - pandas_series = pandas.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays) - modin_result = modin_series.sum(level=0) - pandas_result = pandas_series.sum(level=0) - df_equals(modin_result, pandas_result) - @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @pytest.mark.parametrize("axis1", [0, 1, "columns", "index"]) @@ -3361,17 +3322,6 @@ def test_var(data, skipna, ddof): modin_result = modin_series.var(skipna=skipna, ddof=ddof) df_equals(modin_result, pandas_result) - # test for issue #1953 - arrays = [ - ["1", "1", "1", "2", "2", "2", "3", "3", "3"], - ["1", "2", "3", "4", "5", "6", "7", "8", "9"], - ] - modin_series = pd.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays) - pandas_series = pandas.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays) - modin_result = modin_series.var(level=0) - pandas_result = pandas_series.var(level=0) - df_equals(modin_result, pandas_result) - def test_view(): modin_series = pd.Series([-2, -1, 0, 1, 2], dtype="int8")