Skip to content

Commit

Permalink
FEAT-modin-project#2491: fixes
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Chigarev <[email protected]>
  • Loading branch information
dchigarev committed Dec 17, 2020
1 parent 907662f commit c0d4df7
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 22 deletions.
11 changes: 4 additions & 7 deletions modin/backends/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
)
from pandas.core.base import DataError
from typing import Type, Callable
from collections.abc import Container
from collections.abc import Iterable
import warnings


Expand Down Expand Up @@ -2462,7 +2462,7 @@ def _groupby_dict_reduce(
map_dict = {}
reduce_dict = {}
rename_columns = any(
not isinstance(fn, str) and isinstance(fn, Container)
not isinstance(fn, str) and isinstance(fn, Iterable)
for fn in agg_func.values()
)
for col, col_funcs in agg_func.items():
Expand All @@ -2475,11 +2475,8 @@ def _groupby_dict_reduce(

map_fns = []
for i, fn in enumerate(col_funcs):
if not isinstance(fn, str) and isinstance(fn, Container):
assert (
len(fn) == 2
), f"Incorrect number of values to unpack. (got {len(fn)} expected 2)"
future_col_name, func = fn[0], fn[1]
if not isinstance(fn, str) and isinstance(fn, Iterable):
future_col_name, func = fn
elif isinstance(fn, str):
future_col_name, func = fn, fn
else:
Expand Down
22 changes: 8 additions & 14 deletions modin/pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from pandas.core.aggregation import reconstruct_func
import pandas.core.common as com
from types import BuiltinFunctionType
from collections.abc import Iterable

from modin.error_message import ErrorMessage
from modin.utils import _inherit_docstrings, try_cast_to_pandas, wrap_udf_function
Expand Down Expand Up @@ -376,22 +377,15 @@ def aggregate(self, func=None, *args, **kwargs):
relabeling_required = False
if isinstance(func, dict) or func is None:

def _reconstruct_func(func, **kwargs):
relabeling_required, func, new_columns, order = reconstruct_func(
func, **kwargs
)
# We convert to the string version of the function for simplicity.
func = {
k: v
if not callable(v) or v.__name__ not in dir(self)
else v.__name__
for k, v in func.items()
}
return relabeling_required, func, new_columns, order

relabeling_required, func_dict, new_columns, order = _reconstruct_func(
def try_get_str_func(o):
if not isinstance(o, str) and isinstance(o, Iterable):
return [try_get_str_func(v) for v in o]
return o.__name__ if callable(o) and o.__name__ in dir(self) else o

relabeling_required, func_dict, new_columns, order = reconstruct_func(
func, **kwargs
)
func_dict = {k: try_get_str_func(v) for k, v in func_dict.items()}

if any(i not in self._df.columns for i in func_dict.keys()):
from pandas.core.base import SpecificationError
Expand Down
3 changes: 2 additions & 1 deletion modin/pandas/test/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,7 @@ def test_large_row_groupby(is_by_category):
min,
sum,
{"A": "sum"},
{"A": lambda df: df.sum()},
{"A": "max", "B": "sum", "C": "min"},
]
for func in agg_functions:
Expand Down Expand Up @@ -1437,7 +1438,7 @@ def get_columns(df):
),
lambda grp: grp.agg(
{
df_from_grp(grp).columns[0]: (max, min, sum),
df_from_grp(grp).columns[0]: (("new_max", max), min, sum),
df_from_grp(grp).columns[-1]: (sum, min, max),
}
),
Expand Down

0 comments on commit c0d4df7

Please sign in to comment.