From 3106e4929c033a7bc95acd404bf1bf6ab4f1ae86 Mon Sep 17 00:00:00 2001
From: Dmitry Chigarev <62142979+dchigarev@users.noreply.github.com>
Date: Tue, 12 Jan 2021 14:23:53 +0300
Subject: [PATCH] FIX-#2543: fixed handling 'as_index' at groupby dictionary
 renaming aggregation (#2592)

Signed-off-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
---
 modin/backends/pandas/query_compiler.py |  7 ++-
 modin/pandas/groupby.py                 | 14 ++++++
 modin/pandas/test/test_groupby.py       | 62 +++++++++++++++++++++----
 3 files changed, 74 insertions(+), 9 deletions(-)

diff --git a/modin/backends/pandas/query_compiler.py b/modin/backends/pandas/query_compiler.py
index bce56da3901..66f3fc3cd1b 100644
--- a/modin/backends/pandas/query_compiler.py
+++ b/modin/backends/pandas/query_compiler.py
@@ -2483,7 +2483,12 @@ def _groupby_dict_reduce(
                     raise TypeError
 
                 map_fns.append((new_col_name, groupby_reduce_functions[func][0]))
-                reduce_dict[(col, new_col_name)] = groupby_reduce_functions[func][1]
+                reduced_col_name = (
+                    (*col, new_col_name)
+                    if isinstance(col, tuple)
+                    else (col, new_col_name)
+                )
+                reduce_dict[reduced_col_name] = groupby_reduce_functions[func][1]
             map_dict[col] = map_fns
         return GroupbyReduceFunction.register(map_dict, reduce_dict)(
             query_compiler=self,
diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 2abeaa375b8..fdb08222ec1 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -425,6 +425,20 @@ def try_get_str_func(fn):
         )
 
         if relabeling_required:
+            if not self._as_index:
+                nby_cols = len(result.columns) - len(new_columns)
+                order = np.concatenate([np.arange(nby_cols), order + nby_cols])
+                by_cols = result.columns[:nby_cols]
+                new_columns = pandas.Index(new_columns)
+                if by_cols.nlevels != new_columns.nlevels:
+                    by_cols = by_cols.remove_unused_levels()
+                    empty_levels = [
+                        i
+                        for i, level in enumerate(by_cols.levels)
+                        if len(level) == 1 and level[0] == ""
+                    ]
+                    by_cols = by_cols.droplevel(empty_levels)
+                new_columns = by_cols.append(new_columns)
             result = result.iloc[:, order]
             result.columns = new_columns
         return result
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index b116ffadf69..ab500c24a09 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -25,6 +25,7 @@
     test_data,
     test_data_values,
     modin_df_almost_equals_pandas,
+    generate_multiindex,
 )
 
 pd.DEFAULT_NPARTITIONS = 4
@@ -1247,18 +1248,25 @@ def test_shift_freq(groupby_axis, shift_axis):
             ],
             "agg_dict": {
                 "max": (list(test_data["int_data"].keys())[1], max),
-                "min": (list(test_data["int_data"].keys())[-1], min),
+                "min": (list(test_data["int_data"].keys())[-2], min),
             },
         },
+        pytest.param(
+            {
+                "by": [
+                    list(test_data["int_data"].keys())[0],
+                    list(test_data["int_data"].keys())[-1],
+                ],
+                "agg_dict": {
+                    "max": (list(test_data["int_data"].keys())[1], max),
+                    "min": (list(test_data["int_data"].keys())[-1], min),
+                },
+            },
+            marks=pytest.mark.skip("See Modin issue #2542"),
+        ),
     ],
 )
-@pytest.mark.parametrize(
-    "as_index",
-    [
-        True,
-        pytest.param(False, marks=pytest.mark.xfail(reason="See modin issue #2543")),
-    ],
-)
+@pytest.mark.parametrize("as_index", [True, False])
 def test_agg_func_None_rename(by_and_agg_dict, as_index):
     modin_df, pandas_df = create_test_dfs(test_data["int_data"])
 
@@ -1271,6 +1279,44 @@ def test_agg_func_None_rename(by_and_agg_dict, as_index):
     df_equals(modin_result, pandas_result)
 
 
+@pytest.mark.parametrize(
+    "as_index",
+    [
+        True,
+        pytest.param(
+            False,
+            marks=pytest.mark.xfail_backends(
+                ["BaseOnPython"], reason="See Pandas issue #39103"
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize("by_length", [1, 3])
+@pytest.mark.parametrize(
+    "agg_fns",
+    [["sum", "min", "max"], ["mean", "quantile"]],
+    ids=["reduction", "aggregation"],
+)
+def test_dict_agg_rename_mi_columns(as_index, by_length, agg_fns):
+    md_df, pd_df = create_test_dfs(test_data["int_data"])
+    mi_columns = generate_multiindex(len(md_df.columns), nlevels=4)
+
+    md_df.columns, pd_df.columns = mi_columns, mi_columns
+
+    by = list(md_df.columns[:by_length])
+    agg_cols = list(md_df.columns[by_length : by_length + 3])
+
+    agg_dict = {
+        f"custom-{i}" + str(agg_fns[i % len(agg_fns)]): (col, agg_fns[i % len(agg_fns)])
+        for i, col in enumerate(agg_cols)
+    }
+
+    md_res = md_df.groupby(by, as_index=as_index).agg(**agg_dict)
+    pd_res = md_df.groupby(by, as_index=as_index).agg(**agg_dict)
+
+    df_equals(md_res, pd_res)
+
+
 @pytest.mark.parametrize(
     "operation",
     [