Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev committed Sep 23, 2023
1 parent b1d6e2a commit 69382e7
Showing 1 changed file with 142 additions and 100 deletions.
242 changes: 142 additions & 100 deletions modin/pandas/test/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import contextlib
import datetime
import itertools
import warnings
from types import BuiltinFunctionType
from unittest import mock

import numpy as np
Expand Down Expand Up @@ -64,25 +62,30 @@
# of defaulting to pandas.
pytestmark = [
pytest.mark.filterwarnings(default_to_pandas_ignore_string),
# pytest.mark.filterwarnings("error::FutureWarning"),
pytest.mark.filterwarnings(
"ignore:DataFrameGroupBy.dtypes is deprecated and will be removed in a future version:FutureWarning"
"ignore:DataFrame.groupby with axis=1 is deprecated:FutureWarning"
),
# FIXME: these cases inconsistent between modin and pandas
pytest.mark.filterwarnings(
"ignore:DataFrame.groupby with axis=1 is deprecated:FutureWarning"
"ignore:DataFrameGroupBy.dtypes is deprecated:FutureWarning"
),
pytest.mark.filterwarnings(
"ignore:DataFrameGroupBy.shift with axis=1 is deprecated:FutureWarning"
),
pytest.mark.filterwarnings(
"ignore:(DataFrameGroupBy|SeriesGroupBy|DataFrame|Series).fillna with 'method' is deprecated:FutureWarning"
),
# FIXME: these cases inconsistent between modin and pandas
pytest.mark.filterwarnings(
"ignore:A grouping was used that is not in the columns of the DataFrame and so was excluded from the result:FutureWarning"
),
pytest.mark.filterwarnings(
"ignore:The default of observed=False is deprecated:FutureWarning"
),
pytest.mark.filterwarnings(
"ignore:.*DataFrame.idxmax with all-NA values, or any-NA and skipna=False, is deprecated.*:FutureWarning"
"ignore:DataFrame.idxmax with all-NA values, or any-NA and skipna=False, is deprecated:FutureWarning"
),
pytest.mark.filterwarnings(
"ignore:.*DataFrame.idxmin with all-NA values, or any-NA and skipna=False, is deprecated.*:FutureWarning"
"ignore:DataFrame.idxmin with all-NA values, or any-NA and skipna=False, is deprecated:FutureWarning"
),
pytest.mark.filterwarnings(
"ignore:.*In a future version of pandas, the provided callable will be used directly.*:FutureWarning"
Expand Down Expand Up @@ -261,7 +264,7 @@ def test_mixed_dtypes_groupby(as_index):
]
for func in agg_functions:
eval_agg(modin_groupby, pandas_groupby, func)
eval_agg(modin_groupby, pandas_groupby, func, agg_or_aggregate="aggregate")
eval_aggregate(modin_groupby, pandas_groupby, func)

eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
eval_max(modin_groupby, pandas_groupby)
Expand Down Expand Up @@ -302,12 +305,12 @@ def test_mixed_dtypes_groupby(as_index):
eval_pipe(modin_groupby, pandas_groupby, func)

# FIXME: ValueError: cannot join with no overlapping index names
# eval_general(
# modin_groupby,
# pandas_groupby,
# lambda df: df.corr(numeric_only=True),
# modin_df_almost_equals_pandas,
# )
eval_general(
modin_groupby,
pandas_groupby,
lambda df: df.corr(numeric_only=True),
modin_df_almost_equals_pandas,
)

eval_fillna(modin_groupby, pandas_groupby)
eval_count(modin_groupby, pandas_groupby)
Expand Down Expand Up @@ -711,7 +714,7 @@ def test_single_group_row_groupby():
]
for func in agg_functions:
eval_agg(modin_groupby, pandas_groupby, func)
eval_agg(modin_groupby, pandas_groupby, func, agg_or_aggregate="aggregate")
eval_aggregate(modin_groupby, pandas_groupby, func)

eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
eval_rank(modin_groupby, pandas_groupby)
Expand Down Expand Up @@ -847,7 +850,7 @@ def _callable(df):
]
for func in agg_functions:
eval_agg(modin_groupby, pandas_groupby, func)
eval_agg(modin_groupby, pandas_groupby, func, agg_or_aggregate="aggregate")
eval_aggregate(modin_groupby, pandas_groupby, func)

eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
eval_rank(modin_groupby, pandas_groupby)
Expand Down Expand Up @@ -906,14 +909,8 @@ def test_simple_col_groupby():

by = [1, 2, 3, 2, 1]

with pytest.warns(
FutureWarning, match="DataFrame.groupby with axis=1 is deprecated"
):
modin_groupby = modin_df.groupby(axis=1, by=by)
with pytest.warns(
FutureWarning, match="DataFrame.groupby with axis=1 is deprecated"
):
pandas_groupby = pandas_df.groupby(axis=1, by=by)
modin_groupby = modin_df.groupby(axis=1, by=by)
pandas_groupby = pandas_df.groupby(axis=1, by=by)

modin_groupby_equals_pandas(modin_groupby, pandas_groupby)
eval_ngroups(modin_groupby, pandas_groupby)
Expand Down Expand Up @@ -1110,7 +1107,7 @@ def test_series_groupby(by, as_index_series_or_dataframe):
]
for func in agg_functions:
eval_agg(modin_groupby, pandas_groupby, func)
eval_agg(modin_groupby, pandas_groupby, func, agg_or_aggregate="aggregate")
eval_aggregate(modin_groupby, pandas_groupby, func)

eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
eval_rank(modin_groupby, pandas_groupby)
Expand Down Expand Up @@ -1240,11 +1237,7 @@ def eval_apply(modin_groupby, pandas_groupby, func):


def eval_dtypes(modin_groupby, pandas_groupby):
with pytest.warns(FutureWarning, match=".*DataFrameGroupBy.dtypes is deprecated.*"):
modin_dtypes = modin_groupby.dtypes
with pytest.warns(FutureWarning, match=".*DataFrameGroupBy.dtypes is deprecated.*"):
pandas_dtypes = pandas_groupby.dtypes
df_equals(modin_dtypes, pandas_dtypes)
df_equals(modin_groupby.dtypes, pandas_groupby.dtypes)


def eval_prod(modin_groupby, pandas_groupby, numeric_only=False):
Expand All @@ -1261,25 +1254,12 @@ def eval_std(modin_groupby, pandas_groupby, numeric_only=False):
)


def eval_agg(modin_groupby, pandas_groupby, func, agg_or_aggregate="agg"):
def will_be_replaced(func, obj):
return (
callable(func)
and isinstance(func, BuiltinFunctionType)
and func.__name__ in dir(obj)
)
def eval_aggregate(modin_groupby, pandas_groupby, func):
df_equals(modin_groupby.aggregate(func), pandas_groupby.aggregate(func))

with pytest.warns(
FutureWarning,
match=".*In a future version of pandas, the provided callable will be used directly.*",
) if will_be_replaced(func, modin_groupby) else contextlib.nullcontext():
modin_res = getattr(modin_groupby, agg_or_aggregate)(func)
with pytest.warns(
FutureWarning,
match=".*In a future version of pandas, the provided callable will be used directly.*",
) if will_be_replaced(func, pandas_groupby) else contextlib.nullcontext():
pandas_res = getattr(modin_groupby, agg_or_aggregate)(func)
df_equals(modin_res, pandas_res)

def eval_agg(modin_groupby, pandas_groupby, func):
df_equals(modin_groupby.agg(func), pandas_groupby.agg(func))


def eval_rank(modin_groupby, pandas_groupby):
Expand Down Expand Up @@ -1349,18 +1329,12 @@ def eval_transform(modin_groupby, pandas_groupby, func):
)


def eval_fillna(modin_groupby, pandas_groupby, catch_warns=True):
with pytest.warns(
FutureWarning,
match=r".*(DataFrameGroupBy|SeriesGroupBy|Series).fillna with 'method' is deprecated.*",
) if catch_warns else contextlib.nullcontext():
modin_res = modin_groupby.fillna(method="ffill")
with pytest.warns(
FutureWarning,
match=".*(DataFrameGroupBy|SeriesGroupBy|Series).fillna with 'method' is deprecated.*",
) if catch_warns else contextlib.nullcontext():
pandas_res = pandas_groupby.fillna(method="ffill")
df_equals(*sort_index_if_experimental_groupby(modin_res, pandas_res))
def eval_fillna(modin_groupby, pandas_groupby):
df_equals(
*sort_index_if_experimental_groupby(
modin_groupby.fillna(method="ffill"), pandas_groupby.fillna(method="ffill")
)
)


def eval_count(modin_groupby, pandas_groupby):
Expand Down Expand Up @@ -1465,7 +1439,7 @@ def eval_groups(modin_groupby, pandas_groupby):
df_equals(modin_groupby.get_group(name), pandas_groupby.get_group(name))


def eval_shift(modin_groupby, pandas_groupby, catch_warns=True):
def eval_shift(modin_groupby, pandas_groupby):
def comparator(df1, df2):
df_equals(*sort_index_if_experimental_groupby(df1, df2))

Expand Down Expand Up @@ -1494,16 +1468,8 @@ def comparator(df1, df2):
# https://github.com/modin-project/modin/issues/3701
if get_current_execution() != "BaseOnPython":
if isinstance(pandas_groupby, pandas.core.groupby.DataFrameGroupBy):
with pytest.warns(
FutureWarning,
match=".*DataFrameGroupBy.shift with axis=1 is deprecated.*",
) if catch_warns else contextlib.nullcontext():
pandas_res = pandas_groupby.shift(axis=1, fill_value=777)
with pytest.warns(
FutureWarning,
match=".*DataFrameGroupBy.shift with axis=1 is deprecated.*",
) if catch_warns else contextlib.nullcontext():
modin_res = modin_groupby.shift(axis=1, fill_value=777)
pandas_res = pandas_groupby.shift(axis=1, fill_value=777)
modin_res = modin_groupby.shift(axis=1, fill_value=777)
# Pandas produces unexpected index order (pandas GH 44269).
# Here we align index of Modin result with pandas to make test passed.
import pandas.core.algorithms as algorithms
Expand Down Expand Up @@ -1704,17 +1670,10 @@ def test_shift_freq(groupby_axis, shift_axis, groupby_sort):
pandas_groupby = pandas_df.groupby(by=_by, axis=groupby_axis, sort=groupby_sort)
modin_groupby = modin_df.groupby(by=_by, axis=groupby_axis, sort=groupby_sort)

def _callable(groupby):
with pytest.warns(
FutureWarning,
match=".*DataFrameGroupBy.shift with axis=1 is deprecated.*",
) if shift_axis == 1 else contextlib.nullcontext():
return groupby.shift(axis=shift_axis, freq="S")

eval_general(
modin_groupby,
pandas_groupby,
_callable,
lambda groupby: groupby.shift(axis=shift_axis, freq="S"),
)


Expand Down Expand Up @@ -1849,9 +1808,7 @@ def col3(x):
[
"quantile",
"mean",
pytest.param(
"sum", marks=pytest.mark.skip("See Modin issue #2255 for details")
),
"sum",
"median",
"unique",
"cumprod",
Expand Down Expand Up @@ -2588,7 +2545,7 @@ def run_test(eval_function, *args, **kwargs):
run_test(eval___getattr__, item="b")
run_test(eval___getitem__, item="b")
run_test(eval_agg, func=lambda df: df.mean())
run_test(eval_agg, func=lambda df: df.mean(), agg_or_aggregate="aggregate")
run_test(eval_aggregate, func=lambda df: df.mean())
run_test(eval_any)
run_test(eval_apply, func=lambda df: df.mean())
run_test(eval_count)
Expand All @@ -2597,14 +2554,7 @@ def run_test(eval_function, *args, **kwargs):
run_test(eval_cumprod, numeric_only=True)
run_test(eval_cumsum, numeric_only=True)
run_test(eval_dtypes)
# FIXME: inconsistent behavior when showing warnings
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"DataFrameGroupBy.fillna with 'method' is deprecated",
category=FutureWarning,
)
run_test(eval_fillna, catch_warns=False)
run_test(eval_fillna)
run_test(eval_groups)
run_test(eval_len)
run_test(eval_max)
Expand All @@ -2630,14 +2580,7 @@ def run_test(eval_function, *args, **kwargs):
# https://github.com/modin-project/modin/issues/5505
# https://github.com/modin-project/modin/issues/5506
run_test(eval_pipe, func=lambda df: df.mean())
# FIXME: inconsistent behavior when showing warnings
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"DataFrameGroupBy.shift with axis=1 is deprecated",
category=FutureWarning,
)
run_test(eval_shift, catch_warns=False)
run_test(eval_shift)

# TODO: these functions fail in case of empty data in the pandas itself,
# we have to modify the `eval_*` functions to be able to check for
Expand Down Expand Up @@ -2996,3 +2939,102 @@ def test_reshuffling_groupby_on_strings(modify_config):
eval_general(
modin_df.groupby("col1"), pandas_df.groupby("col1"), lambda grp: grp.mean()
)


### TEST GROUPBY WARNINGS ###


def test_groupby_axis_1_warning():
data = {
"col1": [0, 3, 2, 3],
"col2": [4, 1, 6, 7],
}
modin_df, pandas_df = create_test_dfs(data)

with pytest.warns(
FutureWarning, match="DataFrame.groupby with axis=1 is deprecated"
):
modin_df.groupby(by="col1", axis=1)
with pytest.warns(
FutureWarning, match="DataFrame.groupby with axis=1 is deprecated"
):
pandas_df.groupby(by="col1", axis=1)


def test_groupby_dtypes_warning():
data = {
"col1": [0, 3, 2, 3],
"col2": [4, 1, 6, 7],
}
modin_df, pandas_df = create_test_dfs(data)
modin_groupby = modin_df.groupby(by="col1")
pandas_groupby = pandas_df.groupby(by="col1")

with pytest.warns(FutureWarning, match="DataFrameGroupBy.dtypes is deprecated"):
modin_groupby.dtypes
with pytest.warns(FutureWarning, match="DataFrameGroupBy.dtypes is deprecated"):
pandas_groupby.dtypes


def test_groupby_shift_axis_1_warning():
data = {
"col1": [0, 3, 2, 3],
"col2": [4, 1, 6, 7],
}
modin_df, pandas_df = create_test_dfs(data)
modin_groupby = modin_df.groupby(by="col1")
pandas_groupby = pandas_df.groupby(by="col1")

with pytest.warns(
FutureWarning,
match="DataFrameGroupBy.shift with axis=1 is deprecated",
):
pandas_groupby.shift(axis=1, fill_value=777)
with pytest.warns(
FutureWarning,
match="DataFrameGroupBy.shift with axis=1 is deprecated",
):
modin_groupby.shift(axis=1, fill_value=777)


def test_groupby_fillna_axis_1_warning():
data = {
"col1": [0, 3, 2, 3],
"col2": [4, None, 6, None],
}
modin_df, pandas_df = create_test_dfs(data)
modin_groupby = modin_df.groupby(by="col1")
pandas_groupby = pandas_df.groupby(by="col1")

with pytest.warns(
FutureWarning,
match="DataFrameGroupBy.fillna with 'method' is deprecated",
):
modin_groupby.fillna(method="ffill")
with pytest.warns(
FutureWarning,
match="DataFrameGroupBy.fillna with 'method' is deprecated",
):
pandas_groupby.fillna(method="ffill")


def test_groupby_agg_provided_callable_warning():
data = {
"col1": [0, 3, 2, 3],
"col2": [4, 1, 6, 7],
}
modin_df, pandas_df = create_test_dfs(data)
modin_groupby = modin_df.groupby(by="col1")
pandas_groupby = pandas_df.groupby(by="col1")

for func in (sum, max):
with pytest.warns(
FutureWarning,
match="In a future version of pandas, the provided callable will be used directly",
):
modin_groupby.agg(func)
with pytest.warns(
FutureWarning,
match="In a future version of pandas, the provided callable will be used directly",
):
pandas_groupby.agg(func)

0 comments on commit 69382e7

Please sign in to comment.