Skip to content

Commit

Permalink
FIX-#7381: Fix Series binary operators ignoring fill_value (#7394)
Browse files Browse the repository at this point in the history
Signed-off-by: Jonathan Shi <[email protected]>
Co-authored-by: Anatoly Myachev <[email protected]>
  • Loading branch information
noloerino and anmyachev authored Sep 17, 2024
1 parent 05f5e7d commit 6cf3ca2
Show file tree
Hide file tree
Showing 6 changed files with 240 additions and 18 deletions.
8 changes: 8 additions & 0 deletions modin/core/storage_formats/base/doc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,14 @@ def doc_binary_method(operation, sign, self_on_right=False, op_type="arithmetic"
fill_value : float or None
Value to fill missing elements during frame alignment.
""",
"series_comparison": """
level : int or label
In case of MultiIndex match index values on the passed level.
fill_value : float or None
Value to fill missing elements during frame alignment.
axis : {{0, 1}}
Unused. Parameter needed for compatibility with DataFrame.
""",
}

verbose_substitution = (
Expand Down
76 changes: 76 additions & 0 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,18 @@ def combine_first(self, other, **kwargs): # noqa: PR02
def eq(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.eq)(self, other=other, **kwargs)

@doc_utils.doc_binary_method(
operation="equality comparison", sign="==", op_type="series_comparison"
)
def series_eq(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.Series.eq)(
self,
other=other,
squeeze_self=True,
squeeze_other=kwargs.pop("squeeze_other", False),
**kwargs,
)

@doc_utils.add_refer_to("DataFrame.equals")
def equals(self, other): # noqa: PR01, RT01
return BinaryDefault.register(pandas.DataFrame.equals)(self, other=other)
Expand Down Expand Up @@ -685,24 +697,76 @@ def divmod(self, other, **kwargs):
def ge(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.ge)(self, other=other, **kwargs)

@doc_utils.doc_binary_method(
operation="greater than or equal comparison",
sign=">=",
op_type="series_comparison",
)
def series_ge(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.Series.ge)(
self,
other=other,
squeeze_self=True,
squeeze_other=kwargs.pop("squeeze_other", False),
**kwargs,
)

@doc_utils.doc_binary_method(
operation="greater than comparison", sign=">", op_type="comparison"
)
def gt(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.gt)(self, other=other, **kwargs)

@doc_utils.doc_binary_method(
operation="greater than comparison", sign=">", op_type="series_comparison"
)
def series_gt(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.Series.gt)(
self,
other=other,
squeeze_self=True,
squeeze_other=kwargs.pop("squeeze_other", False),
**kwargs,
)

@doc_utils.doc_binary_method(
operation="less than or equal comparison", sign="<=", op_type="comparison"
)
def le(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.le)(self, other=other, **kwargs)

@doc_utils.doc_binary_method(
operation="less than or equal comparison",
sign="<=",
op_type="series_comparison",
)
def series_le(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.Series.le)(
self,
other=other,
squeeze_self=True,
squeeze_other=kwargs.pop("squeeze_other", False),
**kwargs,
)

@doc_utils.doc_binary_method(
operation="less than comparison", sign="<", op_type="comparison"
)
def lt(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.lt)(self, other=other, **kwargs)

@doc_utils.doc_binary_method(
operation="less than", sign="<", op_type="series_comparison"
)
def series_lt(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.Series.lt)(
self,
other=other,
squeeze_self=True,
squeeze_other=kwargs.pop("squeeze_other", False),
**kwargs,
)

@doc_utils.doc_binary_method(operation="modulo", sign="%")
def mod(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.mod)(self, other=other, **kwargs)
Expand Down Expand Up @@ -818,6 +882,18 @@ def dot(self, other, **kwargs): # noqa: PR02
def ne(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.ne)(self, other=other, **kwargs)

@doc_utils.doc_binary_method(
operation="not equal comparison", sign="!=", op_type="series_comparison"
)
def series_ne(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.Series.ne)(
self,
other=other,
squeeze_self=True,
squeeze_other=kwargs.pop("squeeze_other", False),
**kwargs,
)

@doc_utils.doc_binary_method(operation="exponential power", sign="**")
def pow(self, other, **kwargs): # noqa: PR02
return BinaryDefault.register(pandas.DataFrame.pow)(self, other=other, **kwargs)
Expand Down
40 changes: 40 additions & 0 deletions modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,26 @@ def caller(df, *args, **kwargs):
return caller


def _series_logical_binop(func):
"""
Build a callable function to pass to Binary.register for Series logical operators.
Parameters
----------
func : callable
Binary operator method of pandas.Series to be applied.
Returns
-------
callable
"""
return lambda x, y, **kwargs: func(
x.squeeze(axis=1),
y.squeeze(axis=1) if kwargs.pop("squeeze_other", False) else y,
**kwargs,
).to_frame()


@_inherit_docstrings(BaseQueryCompiler)
class PandasQueryCompiler(BaseQueryCompiler, QueryCompilerCaster):
"""
Expand Down Expand Up @@ -522,6 +542,26 @@ def to_numpy(self, **kwargs):
sort=False,
)

# Series logical operators take an additional fill_value flag that dataframe does not
series_eq = Binary.register(
_series_logical_binop(pandas.Series.eq), infer_dtypes="bool"
)
series_ge = Binary.register(
_series_logical_binop(pandas.Series.ge), infer_dtypes="bool"
)
series_gt = Binary.register(
_series_logical_binop(pandas.Series.gt), infer_dtypes="bool"
)
series_le = Binary.register(
_series_logical_binop(pandas.Series.le), infer_dtypes="bool"
)
series_lt = Binary.register(
_series_logical_binop(pandas.Series.lt), infer_dtypes="bool"
)
series_ne = Binary.register(
_series_logical_binop(pandas.Series.ne), infer_dtypes="bool"
)

# Needed for numpy API
_logical_and = Binary.register(
lambda df, other, *args, **kwargs: pandas.DataFrame(
Expand Down
11 changes: 11 additions & 0 deletions modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,17 @@ def _binary_op(self, op, other, **kwargs) -> Self:
]
if op in exclude_list:
kwargs.pop("axis")
# Series logical operations take an additional fill_value argument that DF does not
series_specialize_list = [
"eq",
"ge",
"gt",
"le",
"lt",
"ne",
]
if not self._is_dataframe and op in series_specialize_list:
op = "series_" + op
new_query_compiler = getattr(self._query_compiler, op)(other, **kwargs)
return self._create_or_update_from_compiler(new_query_compiler)

Expand Down
Loading

0 comments on commit 6cf3ca2

Please sign in to comment.