Skip to content

Commit

Permalink
FIX-#2543: fixed handling 'as_index' at groupby dictionary renaming a…
Browse files Browse the repository at this point in the history
…ggregation (#2592)

Signed-off-by: Dmitry Chigarev <[email protected]>
  • Loading branch information
dchigarev authored Jan 12, 2021
1 parent 439e17d commit 3106e49
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 9 deletions.
7 changes: 6 additions & 1 deletion modin/backends/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2483,7 +2483,12 @@ def _groupby_dict_reduce(
raise TypeError

map_fns.append((new_col_name, groupby_reduce_functions[func][0]))
reduce_dict[(col, new_col_name)] = groupby_reduce_functions[func][1]
reduced_col_name = (
(*col, new_col_name)
if isinstance(col, tuple)
else (col, new_col_name)
)
reduce_dict[reduced_col_name] = groupby_reduce_functions[func][1]
map_dict[col] = map_fns
return GroupbyReduceFunction.register(map_dict, reduce_dict)(
query_compiler=self,
Expand Down
14 changes: 14 additions & 0 deletions modin/pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,20 @@ def try_get_str_func(fn):
)

if relabeling_required:
if not self._as_index:
nby_cols = len(result.columns) - len(new_columns)
order = np.concatenate([np.arange(nby_cols), order + nby_cols])
by_cols = result.columns[:nby_cols]
new_columns = pandas.Index(new_columns)
if by_cols.nlevels != new_columns.nlevels:
by_cols = by_cols.remove_unused_levels()
empty_levels = [
i
for i, level in enumerate(by_cols.levels)
if len(level) == 1 and level[0] == ""
]
by_cols = by_cols.droplevel(empty_levels)
new_columns = by_cols.append(new_columns)
result = result.iloc[:, order]
result.columns = new_columns
return result
Expand Down
62 changes: 54 additions & 8 deletions modin/pandas/test/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
test_data,
test_data_values,
modin_df_almost_equals_pandas,
generate_multiindex,
)

pd.DEFAULT_NPARTITIONS = 4
Expand Down Expand Up @@ -1247,18 +1248,25 @@ def test_shift_freq(groupby_axis, shift_axis):
],
"agg_dict": {
"max": (list(test_data["int_data"].keys())[1], max),
"min": (list(test_data["int_data"].keys())[-1], min),
"min": (list(test_data["int_data"].keys())[-2], min),
},
},
pytest.param(
{
"by": [
list(test_data["int_data"].keys())[0],
list(test_data["int_data"].keys())[-1],
],
"agg_dict": {
"max": (list(test_data["int_data"].keys())[1], max),
"min": (list(test_data["int_data"].keys())[-1], min),
},
},
marks=pytest.mark.skip("See Modin issue #2542"),
),
],
)
@pytest.mark.parametrize(
"as_index",
[
True,
pytest.param(False, marks=pytest.mark.xfail(reason="See modin issue #2543")),
],
)
@pytest.mark.parametrize("as_index", [True, False])
def test_agg_func_None_rename(by_and_agg_dict, as_index):
modin_df, pandas_df = create_test_dfs(test_data["int_data"])

Expand All @@ -1271,6 +1279,44 @@ def test_agg_func_None_rename(by_and_agg_dict, as_index):
df_equals(modin_result, pandas_result)


@pytest.mark.parametrize(
"as_index",
[
True,
pytest.param(
False,
marks=pytest.mark.xfail_backends(
["BaseOnPython"], reason="See Pandas issue #39103"
),
),
],
)
@pytest.mark.parametrize("by_length", [1, 3])
@pytest.mark.parametrize(
"agg_fns",
[["sum", "min", "max"], ["mean", "quantile"]],
ids=["reduction", "aggregation"],
)
def test_dict_agg_rename_mi_columns(as_index, by_length, agg_fns):
md_df, pd_df = create_test_dfs(test_data["int_data"])
mi_columns = generate_multiindex(len(md_df.columns), nlevels=4)

md_df.columns, pd_df.columns = mi_columns, mi_columns

by = list(md_df.columns[:by_length])
agg_cols = list(md_df.columns[by_length : by_length + 3])

agg_dict = {
f"custom-{i}" + str(agg_fns[i % len(agg_fns)]): (col, agg_fns[i % len(agg_fns)])
for i, col in enumerate(agg_cols)
}

md_res = md_df.groupby(by, as_index=as_index).agg(**agg_dict)
pd_res = md_df.groupby(by, as_index=as_index).agg(**agg_dict)

df_equals(md_res, pd_res)


@pytest.mark.parametrize(
"operation",
[
Expand Down

0 comments on commit 3106e49

Please sign in to comment.