cleanup

Signed-off-by: Anatoly Myachev <[email protected]>
modin-project · Sep 23, 2023 · 69382e7 · 69382e7
1 parent b1d6e2a
commit 69382e7
Showing 1 changed file with 142 additions and 100 deletions.
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
@@ -11,11 +11,9 @@
 # ANY KIND, either express or implied. See the License for the specific language
 # governing permissions and limitations under the License.
 
-import contextlib
 import datetime
 import itertools
 import warnings
-from types import BuiltinFunctionType
 from unittest import mock
 
 import numpy as np
@@ -64,25 +62,30 @@
 # of defaulting to pandas.
 pytestmark = [
     pytest.mark.filterwarnings(default_to_pandas_ignore_string),
-    # pytest.mark.filterwarnings("error::FutureWarning"),
     pytest.mark.filterwarnings(
-        "ignore:DataFrameGroupBy.dtypes is deprecated and will be removed in a future version:FutureWarning"
+        "ignore:DataFrame.groupby with axis=1 is deprecated:FutureWarning"
     ),
-    # FIXME: these cases inconsistent between modin and pandas
     pytest.mark.filterwarnings(
-        "ignore:DataFrame.groupby with axis=1 is deprecated:FutureWarning"
+        "ignore:DataFrameGroupBy.dtypes is deprecated:FutureWarning"
+    ),
+    pytest.mark.filterwarnings(
+        "ignore:DataFrameGroupBy.shift with axis=1 is deprecated:FutureWarning"
+    ),
+    pytest.mark.filterwarnings(
+        "ignore:(DataFrameGroupBy|SeriesGroupBy|DataFrame|Series).fillna with 'method' is deprecated:FutureWarning"
     ),
+    # FIXME: these cases inconsistent between modin and pandas
     pytest.mark.filterwarnings(
         "ignore:A grouping was used that is not in the columns of the DataFrame and so was excluded from the result:FutureWarning"
     ),
     pytest.mark.filterwarnings(
         "ignore:The default of observed=False is deprecated:FutureWarning"
     ),
     pytest.mark.filterwarnings(
-        "ignore:.*DataFrame.idxmax with all-NA values, or any-NA and skipna=False, is deprecated.*:FutureWarning"
+        "ignore:DataFrame.idxmax with all-NA values, or any-NA and skipna=False, is deprecated:FutureWarning"
     ),
     pytest.mark.filterwarnings(
-        "ignore:.*DataFrame.idxmin with all-NA values, or any-NA and skipna=False, is deprecated.*:FutureWarning"
+        "ignore:DataFrame.idxmin with all-NA values, or any-NA and skipna=False, is deprecated:FutureWarning"
     ),
     pytest.mark.filterwarnings(
         "ignore:.*In a future version of pandas, the provided callable will be used directly.*:FutureWarning"
@@ -261,7 +264,7 @@ def test_mixed_dtypes_groupby(as_index):
         ]
         for func in agg_functions:
             eval_agg(modin_groupby, pandas_groupby, func)
-            eval_agg(modin_groupby, pandas_groupby, func, agg_or_aggregate="aggregate")
+            eval_aggregate(modin_groupby, pandas_groupby, func)
 
         eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
         eval_max(modin_groupby, pandas_groupby)
@@ -302,12 +305,12 @@ def test_mixed_dtypes_groupby(as_index):
             eval_pipe(modin_groupby, pandas_groupby, func)
 
         # FIXME:  ValueError: cannot join with no overlapping index names
-        # eval_general(
-        #    modin_groupby,
-        #    pandas_groupby,
-        #    lambda df: df.corr(numeric_only=True),
-        #    modin_df_almost_equals_pandas,
-        # )
+        eval_general(
+            modin_groupby,
+            pandas_groupby,
+            lambda df: df.corr(numeric_only=True),
+            modin_df_almost_equals_pandas,
+        )
 
         eval_fillna(modin_groupby, pandas_groupby)
         eval_count(modin_groupby, pandas_groupby)
@@ -711,7 +714,7 @@ def test_single_group_row_groupby():
     ]
     for func in agg_functions:
         eval_agg(modin_groupby, pandas_groupby, func)
-        eval_agg(modin_groupby, pandas_groupby, func, agg_or_aggregate="aggregate")
+        eval_aggregate(modin_groupby, pandas_groupby, func)
 
     eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
     eval_rank(modin_groupby, pandas_groupby)
@@ -847,7 +850,7 @@ def _callable(df):
     ]
     for func in agg_functions:
         eval_agg(modin_groupby, pandas_groupby, func)
-        eval_agg(modin_groupby, pandas_groupby, func, agg_or_aggregate="aggregate")
+        eval_aggregate(modin_groupby, pandas_groupby, func)
 
     eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
     eval_rank(modin_groupby, pandas_groupby)
@@ -906,14 +909,8 @@ def test_simple_col_groupby():
 
     by = [1, 2, 3, 2, 1]
 
-    with pytest.warns(
-        FutureWarning, match="DataFrame.groupby with axis=1 is deprecated"
-    ):
-        modin_groupby = modin_df.groupby(axis=1, by=by)
-    with pytest.warns(
-        FutureWarning, match="DataFrame.groupby with axis=1 is deprecated"
-    ):
-        pandas_groupby = pandas_df.groupby(axis=1, by=by)
+    modin_groupby = modin_df.groupby(axis=1, by=by)
+    pandas_groupby = pandas_df.groupby(axis=1, by=by)
 
     modin_groupby_equals_pandas(modin_groupby, pandas_groupby)
     eval_ngroups(modin_groupby, pandas_groupby)
@@ -1110,7 +1107,7 @@ def test_series_groupby(by, as_index_series_or_dataframe):
         ]
         for func in agg_functions:
             eval_agg(modin_groupby, pandas_groupby, func)
-            eval_agg(modin_groupby, pandas_groupby, func, agg_or_aggregate="aggregate")
+            eval_aggregate(modin_groupby, pandas_groupby, func)
 
         eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
         eval_rank(modin_groupby, pandas_groupby)
@@ -1240,11 +1237,7 @@ def eval_apply(modin_groupby, pandas_groupby, func):
 
 
 def eval_dtypes(modin_groupby, pandas_groupby):
-    with pytest.warns(FutureWarning, match=".*DataFrameGroupBy.dtypes is deprecated.*"):
-        modin_dtypes = modin_groupby.dtypes
-    with pytest.warns(FutureWarning, match=".*DataFrameGroupBy.dtypes is deprecated.*"):
-        pandas_dtypes = pandas_groupby.dtypes
-    df_equals(modin_dtypes, pandas_dtypes)
+    df_equals(modin_groupby.dtypes, pandas_groupby.dtypes)
 
 
 def eval_prod(modin_groupby, pandas_groupby, numeric_only=False):
@@ -1261,25 +1254,12 @@ def eval_std(modin_groupby, pandas_groupby, numeric_only=False):
     )
 
 
-def eval_agg(modin_groupby, pandas_groupby, func, agg_or_aggregate="agg"):
-    def will_be_replaced(func, obj):
-        return (
-            callable(func)
-            and isinstance(func, BuiltinFunctionType)
-            and func.__name__ in dir(obj)
-        )
+def eval_aggregate(modin_groupby, pandas_groupby, func):
+    df_equals(modin_groupby.aggregate(func), pandas_groupby.aggregate(func))
 
-    with pytest.warns(
-        FutureWarning,
-        match=".*In a future version of pandas, the provided callable will be used directly.*",
-    ) if will_be_replaced(func, modin_groupby) else contextlib.nullcontext():
-        modin_res = getattr(modin_groupby, agg_or_aggregate)(func)
-    with pytest.warns(
-        FutureWarning,
-        match=".*In a future version of pandas, the provided callable will be used directly.*",
-    ) if will_be_replaced(func, pandas_groupby) else contextlib.nullcontext():
-        pandas_res = getattr(modin_groupby, agg_or_aggregate)(func)
-    df_equals(modin_res, pandas_res)
+
+def eval_agg(modin_groupby, pandas_groupby, func):
+    df_equals(modin_groupby.agg(func), pandas_groupby.agg(func))
 
 
 def eval_rank(modin_groupby, pandas_groupby):
@@ -1349,18 +1329,12 @@ def eval_transform(modin_groupby, pandas_groupby, func):
     )
 
 
-def eval_fillna(modin_groupby, pandas_groupby, catch_warns=True):
-    with pytest.warns(
-        FutureWarning,
-        match=r".*(DataFrameGroupBy|SeriesGroupBy|Series).fillna with 'method' is deprecated.*",
-    ) if catch_warns else contextlib.nullcontext():
-        modin_res = modin_groupby.fillna(method="ffill")
-    with pytest.warns(
-        FutureWarning,
-        match=".*(DataFrameGroupBy|SeriesGroupBy|Series).fillna with 'method' is deprecated.*",
-    ) if catch_warns else contextlib.nullcontext():
-        pandas_res = pandas_groupby.fillna(method="ffill")
-    df_equals(*sort_index_if_experimental_groupby(modin_res, pandas_res))
+def eval_fillna(modin_groupby, pandas_groupby):
+    df_equals(
+        *sort_index_if_experimental_groupby(
+            modin_groupby.fillna(method="ffill"), pandas_groupby.fillna(method="ffill")
+        )
+    )
 
 
 def eval_count(modin_groupby, pandas_groupby):
@@ -1465,7 +1439,7 @@ def eval_groups(modin_groupby, pandas_groupby):
         df_equals(modin_groupby.get_group(name), pandas_groupby.get_group(name))
 
 
-def eval_shift(modin_groupby, pandas_groupby, catch_warns=True):
+def eval_shift(modin_groupby, pandas_groupby):
     def comparator(df1, df2):
         df_equals(*sort_index_if_experimental_groupby(df1, df2))
 
@@ -1494,16 +1468,8 @@ def comparator(df1, df2):
     # https://github.com/modin-project/modin/issues/3701
     if get_current_execution() != "BaseOnPython":
         if isinstance(pandas_groupby, pandas.core.groupby.DataFrameGroupBy):
-            with pytest.warns(
-                FutureWarning,
-                match=".*DataFrameGroupBy.shift with axis=1 is deprecated.*",
-            ) if catch_warns else contextlib.nullcontext():
-                pandas_res = pandas_groupby.shift(axis=1, fill_value=777)
-            with pytest.warns(
-                FutureWarning,
-                match=".*DataFrameGroupBy.shift with axis=1 is deprecated.*",
-            ) if catch_warns else contextlib.nullcontext():
-                modin_res = modin_groupby.shift(axis=1, fill_value=777)
+            pandas_res = pandas_groupby.shift(axis=1, fill_value=777)
+            modin_res = modin_groupby.shift(axis=1, fill_value=777)
             # Pandas produces unexpected index order (pandas GH 44269).
             # Here we align index of Modin result with pandas to make test passed.
             import pandas.core.algorithms as algorithms
@@ -1704,17 +1670,10 @@ def test_shift_freq(groupby_axis, shift_axis, groupby_sort):
         pandas_groupby = pandas_df.groupby(by=_by, axis=groupby_axis, sort=groupby_sort)
         modin_groupby = modin_df.groupby(by=_by, axis=groupby_axis, sort=groupby_sort)
 
-        def _callable(groupby):
-            with pytest.warns(
-                FutureWarning,
-                match=".*DataFrameGroupBy.shift with axis=1 is deprecated.*",
-            ) if shift_axis == 1 else contextlib.nullcontext():
-                return groupby.shift(axis=shift_axis, freq="S")
-
         eval_general(
             modin_groupby,
             pandas_groupby,
-            _callable,
+            lambda groupby: groupby.shift(axis=shift_axis, freq="S"),
         )
 
 
@@ -1849,9 +1808,7 @@ def col3(x):
     [
         "quantile",
         "mean",
-        pytest.param(
-            "sum", marks=pytest.mark.skip("See Modin issue #2255 for details")
-        ),
+        "sum",
         "median",
         "unique",
         "cumprod",
@@ -2588,7 +2545,7 @@ def run_test(eval_function, *args, **kwargs):
     run_test(eval___getattr__, item="b")
     run_test(eval___getitem__, item="b")
     run_test(eval_agg, func=lambda df: df.mean())
-    run_test(eval_agg, func=lambda df: df.mean(), agg_or_aggregate="aggregate")
+    run_test(eval_aggregate, func=lambda df: df.mean())
     run_test(eval_any)
     run_test(eval_apply, func=lambda df: df.mean())
     run_test(eval_count)
@@ -2597,14 +2554,7 @@ def run_test(eval_function, *args, **kwargs):
     run_test(eval_cumprod, numeric_only=True)
     run_test(eval_cumsum, numeric_only=True)
     run_test(eval_dtypes)
-    # FIXME: inconsistent behavior when showing warnings
-    with warnings.catch_warnings():
-        warnings.filterwarnings(
-            "ignore",
-            "DataFrameGroupBy.fillna with 'method' is deprecated",
-            category=FutureWarning,
-        )
-        run_test(eval_fillna, catch_warns=False)
+    run_test(eval_fillna)
     run_test(eval_groups)
     run_test(eval_len)
     run_test(eval_max)
@@ -2630,14 +2580,7 @@ def run_test(eval_function, *args, **kwargs):
         # https://github.com/modin-project/modin/issues/5505
         # https://github.com/modin-project/modin/issues/5506
         run_test(eval_pipe, func=lambda df: df.mean())
-        # FIXME: inconsistent behavior when showing warnings
-        with warnings.catch_warnings():
-            warnings.filterwarnings(
-                "ignore",
-                "DataFrameGroupBy.shift with axis=1 is deprecated",
-                category=FutureWarning,
-            )
-            run_test(eval_shift, catch_warns=False)
+        run_test(eval_shift)
 
     # TODO: these functions fail in case of empty data in the pandas itself,
     # we have to modify the `eval_*` functions to be able to check for
@@ -2996,3 +2939,102 @@ def test_reshuffling_groupby_on_strings(modify_config):
     eval_general(
         modin_df.groupby("col1"), pandas_df.groupby("col1"), lambda grp: grp.mean()
     )
+
+
+### TEST GROUPBY WARNINGS ###
+
+
+def test_groupby_axis_1_warning():
+    data = {
+        "col1": [0, 3, 2, 3],
+        "col2": [4, 1, 6, 7],
+    }
+    modin_df, pandas_df = create_test_dfs(data)
+
+    with pytest.warns(
+        FutureWarning, match="DataFrame.groupby with axis=1 is deprecated"
+    ):
+        modin_df.groupby(by="col1", axis=1)
+    with pytest.warns(
+        FutureWarning, match="DataFrame.groupby with axis=1 is deprecated"
+    ):
+        pandas_df.groupby(by="col1", axis=1)
+
+
+def test_groupby_dtypes_warning():
+    data = {
+        "col1": [0, 3, 2, 3],
+        "col2": [4, 1, 6, 7],
+    }
+    modin_df, pandas_df = create_test_dfs(data)
+    modin_groupby = modin_df.groupby(by="col1")
+    pandas_groupby = pandas_df.groupby(by="col1")
+
+    with pytest.warns(FutureWarning, match="DataFrameGroupBy.dtypes is deprecated"):
+        modin_groupby.dtypes
+    with pytest.warns(FutureWarning, match="DataFrameGroupBy.dtypes is deprecated"):
+        pandas_groupby.dtypes
+
+
+def test_groupby_shift_axis_1_warning():
+    data = {
+        "col1": [0, 3, 2, 3],
+        "col2": [4, 1, 6, 7],
+    }
+    modin_df, pandas_df = create_test_dfs(data)
+    modin_groupby = modin_df.groupby(by="col1")
+    pandas_groupby = pandas_df.groupby(by="col1")
+
+    with pytest.warns(
+        FutureWarning,
+        match="DataFrameGroupBy.shift with axis=1 is deprecated",
+    ):
+        pandas_groupby.shift(axis=1, fill_value=777)
+    with pytest.warns(
+        FutureWarning,
+        match="DataFrameGroupBy.shift with axis=1 is deprecated",
+    ):
+        modin_groupby.shift(axis=1, fill_value=777)
+
+
+def test_groupby_fillna_axis_1_warning():
+    data = {
+        "col1": [0, 3, 2, 3],
+        "col2": [4, None, 6, None],
+    }
+    modin_df, pandas_df = create_test_dfs(data)
+    modin_groupby = modin_df.groupby(by="col1")
+    pandas_groupby = pandas_df.groupby(by="col1")
+
+    with pytest.warns(
+        FutureWarning,
+        match="DataFrameGroupBy.fillna with 'method' is deprecated",
+    ):
+        modin_groupby.fillna(method="ffill")
+    with pytest.warns(
+        FutureWarning,
+        match="DataFrameGroupBy.fillna with 'method' is deprecated",
+    ):
+        pandas_groupby.fillna(method="ffill")
+
+
+def test_groupby_agg_provided_callable_warning():
+    data = {
+        "col1": [0, 3, 2, 3],
+        "col2": [4, 1, 6, 7],
+    }
+    modin_df, pandas_df = create_test_dfs(data)
+    modin_groupby = modin_df.groupby(by="col1")
+    pandas_groupby = pandas_df.groupby(by="col1")
+
+    for func in (sum, max):
+        with pytest.warns(
+            FutureWarning,
+            match="In a future version of pandas, the provided callable will be used directly",
+        ):
+            modin_groupby.agg(func)
+        with pytest.warns(
+            FutureWarning,
+            match="In a future version of pandas, the provided callable will be used directly",
+        ):
+            pandas_groupby.agg(func)