Skip to content

Commit

Permalink
FEAT-#6498: Make Fold operator more flexible (#7257)
Browse files Browse the repository at this point in the history
Signed-off-by: Igoshev, Iaroslav <[email protected]>
Co-authored-by: Anatoly Myachev <[email protected]>
  • Loading branch information
YarShev and anmyachev authored May 14, 2024
1 parent aa6e5c2 commit e74ac5d
Show file tree
Hide file tree
Showing 4 changed files with 167 additions and 65 deletions.
14 changes: 13 additions & 1 deletion modin/core/dataframe/algebra/fold.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class Fold(Operator):

@classmethod
def register(
cls, fold_function: Callable[..., pandas.DataFrame]
cls, fold_function: Callable[..., pandas.DataFrame], shape_preserved=False
) -> Callable[..., PandasQueryCompiler]:
"""
Build Fold operator that will be performed across rows/columns.
Expand All @@ -39,6 +39,9 @@ def register(
----------
fold_function : callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
Function to apply across rows/columns.
shape_preserved : bool, default: False
Whether the shape of the dataframe is preserved or not
after applying a function.
Returns
-------
Expand All @@ -50,6 +53,8 @@ def caller(
query_compiler: PandasQueryCompiler,
fold_axis: Optional[int] = None,
*args: tuple,
new_index=None,
new_columns=None,
**kwargs: dict,
) -> PandasQueryCompiler:
"""
Expand All @@ -64,6 +69,10 @@ def caller(
apply across full row partitions.
*args : tuple
Additional arguments passed to `fold_function`.
new_index : list-like, optional
The index of the result.
new_columns : list-like, optional
The columns of the result.
**kwargs: dict
Additional keyword arguments passed to `fold_function`.
Expand All @@ -77,6 +86,9 @@ def caller(
query_compiler._modin_frame.fold(
cls.validate_axis(fold_axis),
lambda x: fold_function(x, *args, **kwargs),
new_index=new_index,
new_columns=new_columns,
shape_preserved=shape_preserved,
)
)

Expand Down
37 changes: 19 additions & 18 deletions modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2321,7 +2321,7 @@ def window(
pass

@lazy_metadata_decorator(apply_axis="both")
def fold(self, axis, func, new_columns=None):
def fold(self, axis, func, new_index=None, new_columns=None, shape_preserved=False):
"""
Perform a function across an entire axis.
Expand All @@ -2331,37 +2331,38 @@ def fold(self, axis, func, new_columns=None):
The axis to apply over.
func : callable
The function to apply.
new_index : list-like, optional
The index of the result.
new_columns : list-like, optional
The columns of the result.
Must be the same length as the columns' length of `self`.
The column labels of `self` may change during an operation so
we may want to pass the new column labels in (e.g., see `cat.codes`).
shape_preserved : bool, default: False
Whether the shape of the dataframe is preserved or not
after applying a function.
Returns
-------
PandasDataframe
A new dataframe.
Notes
-----
The data shape is not changed (length and width of the table).
"""
if new_columns is not None:
if self.has_materialized_columns:
assert len(self.columns) == len(
new_columns
), "The length of `new_columns` doesn't match the columns' length of `self`"
self.set_columns_cache(new_columns)
new_row_lengths = None
new_column_widths = None
if shape_preserved:
if new_index is None:
new_index = self.copy_index_cache(copy_lengths=True)
if new_columns is None:
new_columns = self.copy_columns_cache(copy_lengths=True)
new_row_lengths = self._row_lengths_cache
new_column_widths = self._column_widths_cache

new_partitions = self._partition_mgr_cls.map_axis_partitions(
axis, self._partitions, func, keep_partitioning=True
)
return self.__constructor__(
new_partitions,
self.copy_index_cache(copy_lengths=True),
self.copy_columns_cache(copy_lengths=True),
self._row_lengths_cache,
self._column_widths_cache,
new_index,
new_columns,
row_lengths=new_row_lengths,
column_widths=new_column_widths,
pandas_backend=self._pandas_backend,
)

Expand Down
Loading

0 comments on commit e74ac5d

Please sign in to comment.