Skip to content

Commit

Permalink
TEST-modin-project#2033: speed up test_series.py (modin-project#2034)
Browse files Browse the repository at this point in the history
* TEST-modin-project#2033: speed up test_series.py

Signed-off-by: Anatoly Myachev <[email protected]>

* TEST-modin-project#2033: fix kurtosis test

Signed-off-by: Anatoly Myachev <[email protected]>

* TEST-modin-project#2033: refactor test for modin-project#1953

Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev authored and aregm committed Sep 16, 2020
1 parent 4f88ac5 commit d4d42b4
Showing 1 changed file with 59 additions and 109 deletions.
168 changes: 59 additions & 109 deletions modin/pandas/test/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@
test_data_small_keys,
test_data_categorical_values,
test_data_categorical_keys,
generate_multiindex,
test_data_diff_dtype,
)

pd.DEFAULT_NPARTITIONS = 4
Expand Down Expand Up @@ -1850,10 +1852,22 @@ def test_keys(data):
df_equals(modin_series.keys(), pandas_series.keys())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
def test_kurtosis_alias():
# It's optimization. If failed, Series.kurt should be tested explicitly
# in tests: `test_kurt_kurtosis`, `test_kurt_kurtosis_level`.
assert pd.Series.kurt == pd.Series.kurtosis


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("skipna", bool_arg_values, ids=bool_arg_keys)
@pytest.mark.parametrize("level", [None, -1, 0, 1])
def test_kurtosis(axis, skipna):
eval_general(
*create_test_series(test_data["float_nan_data"]),
lambda df: df.kurtosis(axis=axis, skipna=skipna),
)


@pytest.mark.parametrize("axis", ["rows", "columns"])
@pytest.mark.parametrize(
"numeric_only",
[
Expand All @@ -1867,20 +1881,26 @@ def test_keys(data):
None,
],
)
@pytest.mark.parametrize("method", ["kurtosis", "kurt"])
def test_kurt_kurtosis(data, axis, skipna, level, numeric_only, method):
func_kwargs = {
"axis": axis,
"skipna": skipna,
"level": level,
"numeric_only": numeric_only,
}
modin_series, pandas_series = create_test_series(data)
def test_kurtosis_numeric_only(axis, numeric_only):
eval_general(
*create_test_series(test_data_diff_dtype),
lambda df: df.kurtosis(axis=axis, numeric_only=numeric_only),
)


@pytest.mark.parametrize("level", [-1, 0, 1])
def test_kurtosis_level(level):
data = test_data["int_data"]
modin_s, pandas_s = create_test_series(data)

index = generate_multiindex(len(data.keys()))
modin_s.columns = index
pandas_s.columns = index

eval_general(
modin_series,
pandas_series,
lambda df: df.kurtosis(**func_kwargs),
modin_s,
pandas_s,
lambda s: s.kurtosis(axis=1, level=level),
)


Expand Down Expand Up @@ -2015,16 +2035,18 @@ def test_median(data, skipna):
modin_series, pandas_series = create_test_series(data)
df_equals(modin_series.median(skipna=skipna), pandas_series.median(skipna=skipna))

# test for issue #1953

@pytest.mark.parametrize("method", ["median", "skew", "std", "sum", "var", "prod"])
def test_median_skew_std_sum_var_prod_1953(method):
# See #1953 for details
data = [3, 3, 3, 3, 3, 3, 3, 3, 3]
arrays = [
["1", "1", "1", "2", "2", "2", "3", "3", "3"],
["1", "2", "3", "4", "5", "6", "7", "8", "9"],
]
modin_series = pd.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays)
pandas_series = pandas.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays)
modin_result = modin_series.median(level=0)
pandas_result = pandas_series.median(level=0)
df_equals(modin_result, pandas_result)
modin_s = pd.Series(data, index=arrays)
pandas_s = pandas.Series(data, index=arrays)
eval_general(modin_s, pandas_s, lambda s: getattr(s, method)(level=0))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
Expand Down Expand Up @@ -2206,66 +2228,38 @@ def test_pow(data):
inter_df_math_helper(modin_series, pandas_series, "pow")


@pytest.mark.parametrize(
"data",
test_data_values + test_data_small_values,
ids=test_data_keys + test_data_small_keys,
)
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
def test_product_alias():
assert pd.Series.prod == pd.Series.product


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize(
"skipna", bool_arg_values, ids=arg_keys("skipna", bool_arg_keys)
)
def test_prod(axis, skipna):
eval_general(
*create_test_series(test_data["float_nan_data"]),
lambda s: s.prod(axis=axis, skipna=skipna),
)


@pytest.mark.parametrize(
"numeric_only",
[
None,
False,
pytest.param(
True,
marks=pytest.mark.xfail(
reason="numeric_only not implemented for pandas.Series"
),
),
pytest.param(True, marks=pytest.mark.xfail(reason="didn't raise Exception")),
],
)
@pytest.mark.parametrize(
"min_count", int_arg_values, ids=arg_keys("min_count", int_arg_keys)
)
@pytest.mark.parametrize(
"operation",
[
"prod",
pytest.param(
"product",
marks=pytest.mark.skipif(
pandas.Series.product == pandas.Series.prod
and pd.Series.product == pd.Series.prod,
reason="That operation was already tested.",
),
),
],
)
def test_prod(data, axis, skipna, numeric_only, min_count, operation):
def test_prod_specific(min_count, numeric_only):
eval_general(
*create_test_series(data),
lambda df, *args, **kwargs: type(df)([getattr(df, operation)(*args, **kwargs)]),
axis=axis,
skipna=skipna,
numeric_only=numeric_only,
min_count=min_count,
*create_test_series(test_data_diff_dtype),
lambda df: df.prod(min_count=min_count, numeric_only=numeric_only),
)

# test for issue #1953
arrays = [
["1", "1", "1", "2", "2", "2", "3", "3", "3"],
["1", "2", "3", "4", "5", "6", "7", "8", "9"],
]
modin_series = pd.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays)
pandas_series = pandas.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays)
modin_result = modin_series.prod(level=0)
pandas_result = pandas_series.prod(level=0)
df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("q", quantiles_values, ids=quantiles_keys)
Expand Down Expand Up @@ -2784,17 +2778,6 @@ def test_skew(data, skipna):
modin_series, pandas_series = create_test_series(data)
df_equals(modin_series.skew(skipna=skipna), pandas_series.skew(skipna=skipna))

# test for issue #1953
arrays = [
["1", "1", "1", "2", "2", "2", "3", "3", "3"],
["1", "2", "3", "4", "5", "6", "7", "8", "9"],
]
modin_series = pd.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays)
pandas_series = pandas.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays)
modin_result = modin_series.skew(level=0)
pandas_result = pandas_series.skew(level=0)
df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("index", ["default", "ndarray"])
Expand Down Expand Up @@ -2913,17 +2896,6 @@ def test_std(request, data, skipna, ddof):
modin_result = modin_series.std(skipna=skipna, ddof=ddof)
df_equals(modin_result, pandas_result)

# test for issue #1953
arrays = [
["1", "1", "1", "2", "2", "2", "3", "3", "3"],
["1", "2", "3", "4", "5", "6", "7", "8", "9"],
]
modin_series = pd.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays)
pandas_series = pandas.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays)
modin_result = modin_series.std(level=0)
pandas_result = pandas_series.std(level=0)
df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_sub(data):
Expand Down Expand Up @@ -2972,17 +2944,6 @@ def test_sum(data, axis, skipna, numeric_only, min_count):
min_count=min_count,
)

# test for issue #1953
arrays = [
["1", "1", "1", "2", "2", "2", "3", "3", "3"],
["1", "2", "3", "4", "5", "6", "7", "8", "9"],
]
modin_series = pd.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays)
pandas_series = pandas.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays)
modin_result = modin_series.sum(level=0)
pandas_result = pandas_series.sum(level=0)
df_equals(modin_result, pandas_result)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("axis1", [0, 1, "columns", "index"])
Expand Down Expand Up @@ -3361,17 +3322,6 @@ def test_var(data, skipna, ddof):
modin_result = modin_series.var(skipna=skipna, ddof=ddof)
df_equals(modin_result, pandas_result)

# test for issue #1953
arrays = [
["1", "1", "1", "2", "2", "2", "3", "3", "3"],
["1", "2", "3", "4", "5", "6", "7", "8", "9"],
]
modin_series = pd.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays)
pandas_series = pandas.Series([3, 3, 3, 3, 3, 3, 3, 3, 3], index=arrays)
modin_result = modin_series.var(level=0)
pandas_result = pandas_series.var(level=0)
df_equals(modin_result, pandas_result)


def test_view():
modin_series = pd.Series([-2, -1, 0, 1, 2], dtype="int8")
Expand Down

0 comments on commit d4d42b4

Please sign in to comment.