From abfd048c8a62a00b09c0ff9da24899eaaaa62a20 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Thu, 12 Sep 2024 13:39:43 -0700 Subject: [PATCH 1/9] FIX-#7381: Fix Series binary operators ignoring fillna Signed-off-by: Jonathan Shi --- .../storage_formats/base/query_compiler.py | 54 +++++++++++++++++++ .../storage_formats/pandas/query_compiler.py | 15 ++++++ modin/pandas/base.py | 11 ++++ modin/pandas/series.py | 36 ++++++------- modin/tests/pandas/test_series.py | 33 ++++++++++++ 5 files changed, 131 insertions(+), 18 deletions(-) diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py index 343008d2a3d..112616bb27d 100644 --- a/modin/core/storage_formats/base/query_compiler.py +++ b/modin/core/storage_formats/base/query_compiler.py @@ -647,6 +647,15 @@ def combine_first(self, other, **kwargs): # noqa: PR02 def eq(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.eq)(self, other=other, **kwargs) + def series_eq(self, other, **kwargs): + return BinaryDefault.register(pandas.Series.eq)( + self, + other=other, + squeeze_self=True, + squeeze_other=True, + **kwargs, + ) + @doc_utils.add_refer_to("DataFrame.equals") def equals(self, other): # noqa: PR01, RT01 return BinaryDefault.register(pandas.DataFrame.equals)(self, other=other) @@ -685,24 +694,60 @@ def divmod(self, other, **kwargs): def ge(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.ge)(self, other=other, **kwargs) + def series_ge(self, other, **kwargs): + return BinaryDefault.register(pandas.Series.ge)( + self, + other=other, + squeeze_self=True, + squeeze_other=True, + **kwargs, + ) + @doc_utils.doc_binary_method( operation="greater than comparison", sign=">", op_type="comparison" ) def gt(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.gt)(self, other=other, **kwargs) + def series_gt(self, other, **kwargs): + return BinaryDefault.register(pandas.Series.gt)( + self, + other=other, + squeeze_self=True, + squeeze_other=True, + **kwargs, + ) + @doc_utils.doc_binary_method( operation="less than or equal comparison", sign="<=", op_type="comparison" ) def le(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.le)(self, other=other, **kwargs) + def series_le(self, other, **kwargs): + return BinaryDefault.register(pandas.Series.le)( + self, + other=other, + squeeze_self=True, + squeeze_other=True, + **kwargs, + ) + @doc_utils.doc_binary_method( operation="less than comparison", sign="<", op_type="comparison" ) def lt(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.lt)(self, other=other, **kwargs) + def series_lt(self, other, **kwargs): + return BinaryDefault.register(pandas.Series.lt)( + self, + other=other, + squeeze_self=True, + squeeze_other=True, + **kwargs, + ) + @doc_utils.doc_binary_method(operation="modulo", sign="%") def mod(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.mod)(self, other=other, **kwargs) @@ -818,6 +863,15 @@ def dot(self, other, **kwargs): # noqa: PR02 def ne(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.ne)(self, other=other, **kwargs) + def series_ne(self, other, **kwargs): + return BinaryDefault.register(pandas.Series.ne)( + self, + other=other, + squeeze_self=True, + squeeze_other=True, + **kwargs, + ) + @doc_utils.doc_binary_method(operation="exponential power", sign="**") def pow(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.pow)(self, other=other, **kwargs) diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index c7fb0bae21b..e25ce373e1b 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -253,6 +253,13 @@ def caller(df, *args, **kwargs): return caller +def _series_logical_binop(func): + """ + Build a callable function to pass to Binary.register for Series logical operators. + """ + return lambda x, y, **kwargs: func(x.squeeze(axis=1), y.squeeze(axis=1), **kwargs).to_frame() + + @_inherit_docstrings(BaseQueryCompiler) class PandasQueryCompiler(BaseQueryCompiler, QueryCompilerCaster): """ @@ -522,6 +529,14 @@ def to_numpy(self, **kwargs): sort=False, ) + # Series logical operators take an additional fill_value flag that dataframe does not + series_eq = Binary.register(_series_logical_binop(pandas.Series.eq), infer_dtypes="bool") + series_ge = Binary.register(_series_logical_binop(pandas.Series.ge), infer_dtypes="bool") + series_gt = Binary.register(_series_logical_binop(pandas.Series.gt), infer_dtypes="bool") + series_le = Binary.register(_series_logical_binop(pandas.Series.le), infer_dtypes="bool") + series_lt = Binary.register(_series_logical_binop(pandas.Series.lt), infer_dtypes="bool") + series_ne = Binary.register(_series_logical_binop(pandas.Series.ne), infer_dtypes="bool") + # Needed for numpy API _logical_and = Binary.register( lambda df, other, *args, **kwargs: pandas.DataFrame( diff --git a/modin/pandas/base.py b/modin/pandas/base.py index 04dd845915c..1b893743282 100644 --- a/modin/pandas/base.py +++ b/modin/pandas/base.py @@ -509,6 +509,17 @@ def _binary_op(self, op, other, **kwargs) -> Self: ] if op in exclude_list: kwargs.pop("axis") + # Series logical operations take an additional fill_value argument that DF does not + series_specialize_list = [ + "eq", + "ge", + "gt", + "le", + "lt", + "ne", + ] + if not self._is_dataframe and op in series_specialize_list: + op = "series_" + op new_query_compiler = getattr(self._query_compiler, op)(other, **kwargs) return self._create_or_update_from_compiler(new_query_compiler) diff --git a/modin/pandas/series.py b/modin/pandas/series.py index d18a0bec778..e26e70d4370 100644 --- a/modin/pandas/series.py +++ b/modin/pandas/series.py @@ -1031,7 +1031,7 @@ def eq( Return Equal to of series and `other`, element-wise (binary operator `eq`). """ new_self, new_other = self._prepare_inter_op(other) - return super(Series, new_self).eq(new_other, level=level, axis=axis) + return new_self._binary_op("eq", new_other, level=level, fill_value=fill_value, axis=axis) def equals(self, other) -> bool: # noqa: PR01, RT01, D200 """ @@ -1130,7 +1130,7 @@ def floordiv( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).floordiv( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) def ge( @@ -1140,7 +1140,7 @@ def ge( Return greater than or equal to of series and `other`, element-wise (binary operator `ge`). """ new_self, new_other = self._prepare_inter_op(other) - return super(Series, new_self).ge(new_other, level=level, axis=axis) + return new_self._binary_op("ge", new_other, level=level, fill_value=fill_value, axis=axis) def groupby( self, @@ -1188,7 +1188,7 @@ def gt( Return greater than of series and `other`, element-wise (binary operator `gt`). """ new_self, new_other = self._prepare_inter_op(other) - return super(Series, new_self).gt(new_other, level=level, axis=axis) + return new_self._binary_op("gt", new_other, level=level, fill_value=fill_value, axis=axis) def hist( self, @@ -1306,7 +1306,7 @@ def le( Return less than or equal to of series and `other`, element-wise (binary operator `le`). """ new_self, new_other = self._prepare_inter_op(other) - return super(Series, new_self).le(new_other, level=level, axis=axis) + return new_self._binary_op("le", new_other, level=level, fill_value=fill_value, axis=axis) def lt( self, other, level=None, fill_value=None, axis=0 @@ -1315,7 +1315,7 @@ def lt( Return less than of series and `other`, element-wise (binary operator `lt`). """ new_self, new_other = self._prepare_inter_op(other) - return super(Series, new_self).lt(new_other, level=level, axis=axis) + return new_self._binary_op("lt", new_other, level=level, fill_value=fill_value, axis=axis) def map(self, arg, na_action=None) -> Series: # noqa: PR01, RT01, D200 """ @@ -1402,7 +1402,7 @@ def mod( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).mod( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) def mode(self, dropna=True) -> Series: # noqa: PR01, RT01, D200 @@ -1419,7 +1419,7 @@ def mul( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).mul( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) multiply = mul @@ -1432,7 +1432,7 @@ def rmul( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).rmul( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) def ne( @@ -1442,7 +1442,7 @@ def ne( Return not equal to of series and `other`, element-wise (binary operator `ne`). """ new_self, new_other = self._prepare_inter_op(other) - return super(Series, new_self).ne(new_other, level=level, axis=axis) + return new_self._binary_op("ne", new_other, level=level, fill_value=fill_value, axis=axis) def nlargest(self, n=5, keep="first") -> Series: # noqa: PR01, RT01, D200 """ @@ -1562,7 +1562,7 @@ def pow( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).pow( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) @_inherit_docstrings(pandas.Series.prod, apilink="pandas.Series.prod") @@ -1763,7 +1763,7 @@ def rfloordiv( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).rfloordiv( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) def rmod( @@ -1774,7 +1774,7 @@ def rmod( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).rmod( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) def rpow( @@ -1785,7 +1785,7 @@ def rpow( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).rpow( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) def rsub( @@ -1796,7 +1796,7 @@ def rsub( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).rsub( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) def rtruediv( @@ -1807,7 +1807,7 @@ def rtruediv( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).rtruediv( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) rdiv = rtruediv @@ -1955,7 +1955,7 @@ def sub( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).sub( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) subtract = sub @@ -2130,7 +2130,7 @@ def truediv( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).truediv( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) div = divide = truediv diff --git a/modin/tests/pandas/test_series.py b/modin/tests/pandas/test_series.py index 9dd8b98aac3..5b8174a82d8 100644 --- a/modin/tests/pandas/test_series.py +++ b/modin/tests/pandas/test_series.py @@ -5094,3 +5094,36 @@ def test__reduce__(): .rename("league") ) df_equals(result_md, result_pd) + + +@pytest.mark.parametrize("op", [ + "add", + "radd", + "divmod", + "eq", + "floordiv", + "ge", + "gt", + "le", + "lt", + "mod", + "mul", + "rmul", + "ne", + "pow", + "rdivmod", + "rfloordiv", + "rmod", + "rpow", + "rsub", + "rtruediv", + "sub", + "truediv", +]) +def test_binary_with_fill_value_issue_7381(op): + # Ensures that series binary operations respect the fill_value flag + series_md, series_pd = create_test_series([0, 1, 2, 3]) + rhs_md, rhs_pd = create_test_series([0]) + result_md = getattr(series_md, op)(rhs_md, fill_value=2) + result_pd = getattr(series_pd, op)(rhs_pd, fill_value=2) + df_equals(result_md, result_pd) From 763e09a6eccc729a477ae4c6fe7c74c7d0563e0a Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Thu, 12 Sep 2024 13:42:55 -0700 Subject: [PATCH 2/9] lint --- .../storage_formats/pandas/query_compiler.py | 28 +++++++--- modin/pandas/series.py | 24 ++++++--- modin/tests/pandas/test_series.py | 53 ++++++++++--------- 3 files changed, 67 insertions(+), 38 deletions(-) diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index e25ce373e1b..385b3f840a4 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -257,7 +257,9 @@ def _series_logical_binop(func): """ Build a callable function to pass to Binary.register for Series logical operators. """ - return lambda x, y, **kwargs: func(x.squeeze(axis=1), y.squeeze(axis=1), **kwargs).to_frame() + return lambda x, y, **kwargs: func( + x.squeeze(axis=1), y.squeeze(axis=1), **kwargs + ).to_frame() @_inherit_docstrings(BaseQueryCompiler) @@ -530,12 +532,24 @@ def to_numpy(self, **kwargs): ) # Series logical operators take an additional fill_value flag that dataframe does not - series_eq = Binary.register(_series_logical_binop(pandas.Series.eq), infer_dtypes="bool") - series_ge = Binary.register(_series_logical_binop(pandas.Series.ge), infer_dtypes="bool") - series_gt = Binary.register(_series_logical_binop(pandas.Series.gt), infer_dtypes="bool") - series_le = Binary.register(_series_logical_binop(pandas.Series.le), infer_dtypes="bool") - series_lt = Binary.register(_series_logical_binop(pandas.Series.lt), infer_dtypes="bool") - series_ne = Binary.register(_series_logical_binop(pandas.Series.ne), infer_dtypes="bool") + series_eq = Binary.register( + _series_logical_binop(pandas.Series.eq), infer_dtypes="bool" + ) + series_ge = Binary.register( + _series_logical_binop(pandas.Series.ge), infer_dtypes="bool" + ) + series_gt = Binary.register( + _series_logical_binop(pandas.Series.gt), infer_dtypes="bool" + ) + series_le = Binary.register( + _series_logical_binop(pandas.Series.le), infer_dtypes="bool" + ) + series_lt = Binary.register( + _series_logical_binop(pandas.Series.lt), infer_dtypes="bool" + ) + series_ne = Binary.register( + _series_logical_binop(pandas.Series.ne), infer_dtypes="bool" + ) # Needed for numpy API _logical_and = Binary.register( diff --git a/modin/pandas/series.py b/modin/pandas/series.py index e26e70d4370..fc17feca4c8 100644 --- a/modin/pandas/series.py +++ b/modin/pandas/series.py @@ -1031,7 +1031,9 @@ def eq( Return Equal to of series and `other`, element-wise (binary operator `eq`). """ new_self, new_other = self._prepare_inter_op(other) - return new_self._binary_op("eq", new_other, level=level, fill_value=fill_value, axis=axis) + return new_self._binary_op( + "eq", new_other, level=level, fill_value=fill_value, axis=axis + ) def equals(self, other) -> bool: # noqa: PR01, RT01, D200 """ @@ -1140,7 +1142,9 @@ def ge( Return greater than or equal to of series and `other`, element-wise (binary operator `ge`). """ new_self, new_other = self._prepare_inter_op(other) - return new_self._binary_op("ge", new_other, level=level, fill_value=fill_value, axis=axis) + return new_self._binary_op( + "ge", new_other, level=level, fill_value=fill_value, axis=axis + ) def groupby( self, @@ -1188,7 +1192,9 @@ def gt( Return greater than of series and `other`, element-wise (binary operator `gt`). """ new_self, new_other = self._prepare_inter_op(other) - return new_self._binary_op("gt", new_other, level=level, fill_value=fill_value, axis=axis) + return new_self._binary_op( + "gt", new_other, level=level, fill_value=fill_value, axis=axis + ) def hist( self, @@ -1306,7 +1312,9 @@ def le( Return less than or equal to of series and `other`, element-wise (binary operator `le`). """ new_self, new_other = self._prepare_inter_op(other) - return new_self._binary_op("le", new_other, level=level, fill_value=fill_value, axis=axis) + return new_self._binary_op( + "le", new_other, level=level, fill_value=fill_value, axis=axis + ) def lt( self, other, level=None, fill_value=None, axis=0 @@ -1315,7 +1323,9 @@ def lt( Return less than of series and `other`, element-wise (binary operator `lt`). """ new_self, new_other = self._prepare_inter_op(other) - return new_self._binary_op("lt", new_other, level=level, fill_value=fill_value, axis=axis) + return new_self._binary_op( + "lt", new_other, level=level, fill_value=fill_value, axis=axis + ) def map(self, arg, na_action=None) -> Series: # noqa: PR01, RT01, D200 """ @@ -1442,7 +1452,9 @@ def ne( Return not equal to of series and `other`, element-wise (binary operator `ne`). """ new_self, new_other = self._prepare_inter_op(other) - return new_self._binary_op("ne", new_other, level=level, fill_value=fill_value, axis=axis) + return new_self._binary_op( + "ne", new_other, level=level, fill_value=fill_value, axis=axis + ) def nlargest(self, n=5, keep="first") -> Series: # noqa: PR01, RT01, D200 """ diff --git a/modin/tests/pandas/test_series.py b/modin/tests/pandas/test_series.py index 5b8174a82d8..98a82d8c1ae 100644 --- a/modin/tests/pandas/test_series.py +++ b/modin/tests/pandas/test_series.py @@ -5096,34 +5096,37 @@ def test__reduce__(): df_equals(result_md, result_pd) -@pytest.mark.parametrize("op", [ - "add", - "radd", - "divmod", - "eq", - "floordiv", - "ge", - "gt", - "le", - "lt", - "mod", - "mul", - "rmul", - "ne", - "pow", - "rdivmod", - "rfloordiv", - "rmod", - "rpow", - "rsub", - "rtruediv", - "sub", - "truediv", -]) +@pytest.mark.parametrize( + "op", + [ + "add", + "radd", + "divmod", + "eq", + "floordiv", + "ge", + "gt", + "le", + "lt", + "mod", + "mul", + "rmul", + "ne", + "pow", + "rdivmod", + "rfloordiv", + "rmod", + "rpow", + "rsub", + "rtruediv", + "sub", + "truediv", + ], +) def test_binary_with_fill_value_issue_7381(op): # Ensures that series binary operations respect the fill_value flag series_md, series_pd = create_test_series([0, 1, 2, 3]) rhs_md, rhs_pd = create_test_series([0]) result_md = getattr(series_md, op)(rhs_md, fill_value=2) result_pd = getattr(series_pd, op)(rhs_pd, fill_value=2) - df_equals(result_md, result_pd) + df_equals(result_md, result_pd) From 90fb128343c91d100a2f3fb742b5115582fe5917 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Thu, 12 Sep 2024 13:54:58 -0700 Subject: [PATCH 3/9] fix list RHS --- modin/core/storage_formats/pandas/query_compiler.py | 4 +++- modin/tests/pandas/test_series.py | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index 385b3f840a4..a8dd2115a6e 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -258,7 +258,9 @@ def _series_logical_binop(func): Build a callable function to pass to Binary.register for Series logical operators. """ return lambda x, y, **kwargs: func( - x.squeeze(axis=1), y.squeeze(axis=1), **kwargs + x.squeeze(axis=1), + y.squeeze(axis=1) if isinstance(y, pandas.DataFrame) else y, + **kwargs, ).to_frame() diff --git a/modin/tests/pandas/test_series.py b/modin/tests/pandas/test_series.py index 98a82d8c1ae..145307a681d 100644 --- a/modin/tests/pandas/test_series.py +++ b/modin/tests/pandas/test_series.py @@ -5130,3 +5130,12 @@ def test_binary_with_fill_value_issue_7381(op): result_md = getattr(series_md, op)(rhs_md, fill_value=2) result_pd = getattr(series_pd, op)(rhs_pd, fill_value=2) df_equals(result_md, result_pd) + + +@pytest.mark.parametrize("op", ["eq", "ge", "gt", "le", "lt", "ne"]) +def test_logical_binary_with_list(op): + series_md, series_pd = create_test_series([0, 1, 2]) + rhs = [2, 1, 0] + result_md = getattr(series_md, op)(rhs) + result_pd = getattr(series_pd, op)(rhs) + df_equals(result_md, result_pd) From 058433a02b27d5a23b45d8fbd7c3edbe34e8d656 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Thu, 12 Sep 2024 14:10:12 -0700 Subject: [PATCH 4/9] fix lint + default execution --- modin/core/storage_formats/base/query_compiler.py | 12 ++++++------ modin/core/storage_formats/pandas/query_compiler.py | 9 +++++++++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py index 112616bb27d..f85520143a7 100644 --- a/modin/core/storage_formats/base/query_compiler.py +++ b/modin/core/storage_formats/base/query_compiler.py @@ -652,7 +652,7 @@ def series_eq(self, other, **kwargs): self, other=other, squeeze_self=True, - squeeze_other=True, + squeeze_other=isinstance(other, pandas.DataFrame), **kwargs, ) @@ -699,7 +699,7 @@ def series_ge(self, other, **kwargs): self, other=other, squeeze_self=True, - squeeze_other=True, + squeeze_other=isinstance(other, pandas.DataFrame), **kwargs, ) @@ -714,7 +714,7 @@ def series_gt(self, other, **kwargs): self, other=other, squeeze_self=True, - squeeze_other=True, + squeeze_other=isinstance(other, pandas.DataFrame), **kwargs, ) @@ -729,7 +729,7 @@ def series_le(self, other, **kwargs): self, other=other, squeeze_self=True, - squeeze_other=True, + squeeze_other=isinstance(other, pandas.DataFrame), **kwargs, ) @@ -744,7 +744,7 @@ def series_lt(self, other, **kwargs): self, other=other, squeeze_self=True, - squeeze_other=True, + squeeze_other=isinstance(other, pandas.DataFrame), **kwargs, ) @@ -868,7 +868,7 @@ def series_ne(self, other, **kwargs): self, other=other, squeeze_self=True, - squeeze_other=True, + squeeze_other=isinstance(other, pandas.DataFrame), **kwargs, ) diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index a8dd2115a6e..602058f9690 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -256,6 +256,15 @@ def caller(df, *args, **kwargs): def _series_logical_binop(func): """ Build a callable function to pass to Binary.register for Series logical operators. + + Parameters + ---------- + func : callable + Binary operator method of pandas.Series to be applied. + + Returns + ------- + callable """ return lambda x, y, **kwargs: func( x.squeeze(axis=1), From a287f5f99c30c6cdaa45af99b3bc4b838b64b492 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Thu, 12 Sep 2024 14:18:52 -0700 Subject: [PATCH 5/9] fix missing docstrings --- modin/core/storage_formats/base/query_compiler.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py index f85520143a7..5566a4570d8 100644 --- a/modin/core/storage_formats/base/query_compiler.py +++ b/modin/core/storage_formats/base/query_compiler.py @@ -647,6 +647,7 @@ def combine_first(self, other, **kwargs): # noqa: PR02 def eq(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.eq)(self, other=other, **kwargs) + @doc_utils.add_refer_to("Series.eq") def series_eq(self, other, **kwargs): return BinaryDefault.register(pandas.Series.eq)( self, @@ -694,6 +695,7 @@ def divmod(self, other, **kwargs): def ge(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.ge)(self, other=other, **kwargs) + @doc_utils.add_refer_to("Series.ge") def series_ge(self, other, **kwargs): return BinaryDefault.register(pandas.Series.ge)( self, @@ -709,6 +711,7 @@ def series_ge(self, other, **kwargs): def gt(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.gt)(self, other=other, **kwargs) + @doc_utils.add_refer_to("Series.gt") def series_gt(self, other, **kwargs): return BinaryDefault.register(pandas.Series.gt)( self, @@ -724,6 +727,7 @@ def series_gt(self, other, **kwargs): def le(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.le)(self, other=other, **kwargs) + @doc_utils.add_refer_to("Series.le") def series_le(self, other, **kwargs): return BinaryDefault.register(pandas.Series.le)( self, @@ -739,6 +743,7 @@ def series_le(self, other, **kwargs): def lt(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.lt)(self, other=other, **kwargs) + @doc_utils.add_refer_to("Series.lt") def series_lt(self, other, **kwargs): return BinaryDefault.register(pandas.Series.lt)( self, @@ -863,6 +868,7 @@ def dot(self, other, **kwargs): # noqa: PR02 def ne(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.ne)(self, other=other, **kwargs) + @doc_utils.add_refer_to("Series.ne") def series_ne(self, other, **kwargs): return BinaryDefault.register(pandas.Series.ne)( self, From 4a066e1ae2a4fcae5a5978b4041a9d87fafe0832 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Thu, 12 Sep 2024 14:29:48 -0700 Subject: [PATCH 6/9] fix docstrings again --- modin/core/storage_formats/base/doc_utils.py | 8 ++++++ .../storage_formats/base/query_compiler.py | 28 +++++++++++++++---- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/modin/core/storage_formats/base/doc_utils.py b/modin/core/storage_formats/base/doc_utils.py index ce0f3935731..6ba7a98a7f7 100644 --- a/modin/core/storage_formats/base/doc_utils.py +++ b/modin/core/storage_formats/base/doc_utils.py @@ -193,6 +193,14 @@ def doc_binary_method(operation, sign, self_on_right=False, op_type="arithmetic" fill_value : float or None Value to fill missing elements during frame alignment. """, + "series_comparison": """ + level : int or label + In case of MultiIndex match index values on the passed level. + fill_value : float or None + Value to fill missing elements during frame alignment. + axis : {{0, 1}} + Unused. Parameter needed for compatibility with DataFrame. + """, } verbose_substitution = ( diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py index 5566a4570d8..925d8bec866 100644 --- a/modin/core/storage_formats/base/query_compiler.py +++ b/modin/core/storage_formats/base/query_compiler.py @@ -647,7 +647,9 @@ def combine_first(self, other, **kwargs): # noqa: PR02 def eq(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.eq)(self, other=other, **kwargs) - @doc_utils.add_refer_to("Series.eq") + @doc_utils.doc_binary_method( + operation="equality comparison", sign="==", op_type="series_comparison" + ) def series_eq(self, other, **kwargs): return BinaryDefault.register(pandas.Series.eq)( self, @@ -695,7 +697,11 @@ def divmod(self, other, **kwargs): def ge(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.ge)(self, other=other, **kwargs) - @doc_utils.add_refer_to("Series.ge") + @doc_utils.doc_binary_method( + operation="greater than or equal comparison", + sign=">=", + op_type="series_comparison", + ) def series_ge(self, other, **kwargs): return BinaryDefault.register(pandas.Series.ge)( self, @@ -711,7 +717,9 @@ def series_ge(self, other, **kwargs): def gt(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.gt)(self, other=other, **kwargs) - @doc_utils.add_refer_to("Series.gt") + @doc_utils.doc_binary_method( + operation="greater than comparison", sign=">", op_type="series_comparison" + ) def series_gt(self, other, **kwargs): return BinaryDefault.register(pandas.Series.gt)( self, @@ -727,7 +735,11 @@ def series_gt(self, other, **kwargs): def le(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.le)(self, other=other, **kwargs) - @doc_utils.add_refer_to("Series.le") + @doc_utils.doc_binary_method( + operation="less than or equal comparison", + sign="<=", + op_type="series_comparison", + ) def series_le(self, other, **kwargs): return BinaryDefault.register(pandas.Series.le)( self, @@ -743,7 +755,9 @@ def series_le(self, other, **kwargs): def lt(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.lt)(self, other=other, **kwargs) - @doc_utils.add_refer_to("Series.lt") + @doc_utils.doc_binary_method( + operation="less than", sign="<", op_type="series_comparison" + ) def series_lt(self, other, **kwargs): return BinaryDefault.register(pandas.Series.lt)( self, @@ -868,7 +882,9 @@ def dot(self, other, **kwargs): # noqa: PR02 def ne(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.ne)(self, other=other, **kwargs) - @doc_utils.add_refer_to("Series.ne") + @doc_utils.doc_binary_method( + operation="not equal comparison", sign="!=", op_type="series_comparison" + ) def series_ne(self, other, **kwargs): return BinaryDefault.register(pandas.Series.ne)( self, From ae5b571e4d43c9ef13effa249871deac36aabe02 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Thu, 12 Sep 2024 14:36:06 -0700 Subject: [PATCH 7/9] suppress PR02 --- modin/core/storage_formats/base/query_compiler.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py index 925d8bec866..fdd387a59c4 100644 --- a/modin/core/storage_formats/base/query_compiler.py +++ b/modin/core/storage_formats/base/query_compiler.py @@ -650,7 +650,7 @@ def eq(self, other, **kwargs): # noqa: PR02 @doc_utils.doc_binary_method( operation="equality comparison", sign="==", op_type="series_comparison" ) - def series_eq(self, other, **kwargs): + def series_eq(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.Series.eq)( self, other=other, @@ -702,7 +702,7 @@ def ge(self, other, **kwargs): # noqa: PR02 sign=">=", op_type="series_comparison", ) - def series_ge(self, other, **kwargs): + def series_ge(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.Series.ge)( self, other=other, @@ -720,7 +720,7 @@ def gt(self, other, **kwargs): # noqa: PR02 @doc_utils.doc_binary_method( operation="greater than comparison", sign=">", op_type="series_comparison" ) - def series_gt(self, other, **kwargs): + def series_gt(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.Series.gt)( self, other=other, @@ -740,7 +740,7 @@ def le(self, other, **kwargs): # noqa: PR02 sign="<=", op_type="series_comparison", ) - def series_le(self, other, **kwargs): + def series_le(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.Series.le)( self, other=other, @@ -758,7 +758,7 @@ def lt(self, other, **kwargs): # noqa: PR02 @doc_utils.doc_binary_method( operation="less than", sign="<", op_type="series_comparison" ) - def series_lt(self, other, **kwargs): + def series_lt(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.Series.lt)( self, other=other, @@ -885,7 +885,7 @@ def ne(self, other, **kwargs): # noqa: PR02 @doc_utils.doc_binary_method( operation="not equal comparison", sign="!=", op_type="series_comparison" ) - def series_ne(self, other, **kwargs): + def series_ne(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.Series.ne)( self, other=other, From 9116bbd1e01c955ffb9a189bde2484a711f31b3c Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Thu, 12 Sep 2024 16:12:48 -0700 Subject: [PATCH 8/9] use squeeze_other in qc --- .../storage_formats/base/query_compiler.py | 12 +++--- .../storage_formats/pandas/query_compiler.py | 2 +- modin/pandas/series.py | 42 ++++++++++++++++--- 3 files changed, 43 insertions(+), 13 deletions(-) diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py index fdd387a59c4..965fb98efb3 100644 --- a/modin/core/storage_formats/base/query_compiler.py +++ b/modin/core/storage_formats/base/query_compiler.py @@ -655,7 +655,7 @@ def series_eq(self, other, **kwargs): # noqa: PR02 self, other=other, squeeze_self=True, - squeeze_other=isinstance(other, pandas.DataFrame), + squeeze_other=kwargs.pop("squeeze_other", False), **kwargs, ) @@ -707,7 +707,7 @@ def series_ge(self, other, **kwargs): # noqa: PR02 self, other=other, squeeze_self=True, - squeeze_other=isinstance(other, pandas.DataFrame), + squeeze_other=kwargs.pop("squeeze_other", False), **kwargs, ) @@ -725,7 +725,7 @@ def series_gt(self, other, **kwargs): # noqa: PR02 self, other=other, squeeze_self=True, - squeeze_other=isinstance(other, pandas.DataFrame), + squeeze_other=kwargs.pop("squeeze_other", False), **kwargs, ) @@ -745,7 +745,7 @@ def series_le(self, other, **kwargs): # noqa: PR02 self, other=other, squeeze_self=True, - squeeze_other=isinstance(other, pandas.DataFrame), + squeeze_other=kwargs.pop("squeeze_other", False), **kwargs, ) @@ -763,7 +763,7 @@ def series_lt(self, other, **kwargs): # noqa: PR02 self, other=other, squeeze_self=True, - squeeze_other=isinstance(other, pandas.DataFrame), + squeeze_other=kwargs.pop("squeeze_other", False), **kwargs, ) @@ -890,7 +890,7 @@ def series_ne(self, other, **kwargs): # noqa: PR02 self, other=other, squeeze_self=True, - squeeze_other=isinstance(other, pandas.DataFrame), + squeeze_other=kwargs.pop("squeeze_other", False), **kwargs, ) diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index 602058f9690..655d427de1d 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -268,7 +268,7 @@ def _series_logical_binop(func): """ return lambda x, y, **kwargs: func( x.squeeze(axis=1), - y.squeeze(axis=1) if isinstance(y, pandas.DataFrame) else y, + y.squeeze(axis=1) if kwargs.pop("squeeze_other", False) else y, **kwargs, ).to_frame() diff --git a/modin/pandas/series.py b/modin/pandas/series.py index fc17feca4c8..9e52f113176 100644 --- a/modin/pandas/series.py +++ b/modin/pandas/series.py @@ -1032,7 +1032,12 @@ def eq( """ new_self, new_other = self._prepare_inter_op(other) return new_self._binary_op( - "eq", new_other, level=level, fill_value=fill_value, axis=axis + "eq", + new_other, + level=level, + fill_value=fill_value, + axis=axis, + squeeze_other=isinstance(other, (pandas.Series, Series)), ) def equals(self, other) -> bool: # noqa: PR01, RT01, D200 @@ -1143,7 +1148,12 @@ def ge( """ new_self, new_other = self._prepare_inter_op(other) return new_self._binary_op( - "ge", new_other, level=level, fill_value=fill_value, axis=axis + "ge", + new_other, + level=level, + fill_value=fill_value, + axis=axis, + squeeze_other=isinstance(other, (pandas.Series, Series)), ) def groupby( @@ -1193,7 +1203,12 @@ def gt( """ new_self, new_other = self._prepare_inter_op(other) return new_self._binary_op( - "gt", new_other, level=level, fill_value=fill_value, axis=axis + "gt", + new_other, + level=level, + fill_value=fill_value, + axis=axis, + squeeze_other=isinstance(other, (pandas.Series, Series)), ) def hist( @@ -1313,7 +1328,12 @@ def le( """ new_self, new_other = self._prepare_inter_op(other) return new_self._binary_op( - "le", new_other, level=level, fill_value=fill_value, axis=axis + "le", + new_other, + level=level, + fill_value=fill_value, + axis=axis, + squeeze_other=isinstance(other, (pandas.Series, Series)), ) def lt( @@ -1324,7 +1344,12 @@ def lt( """ new_self, new_other = self._prepare_inter_op(other) return new_self._binary_op( - "lt", new_other, level=level, fill_value=fill_value, axis=axis + "lt", + new_other, + level=level, + fill_value=fill_value, + axis=axis, + squeeze_other=isinstance(other, (pandas.Series, Series)), ) def map(self, arg, na_action=None) -> Series: # noqa: PR01, RT01, D200 @@ -1453,7 +1478,12 @@ def ne( """ new_self, new_other = self._prepare_inter_op(other) return new_self._binary_op( - "ne", new_other, level=level, fill_value=fill_value, axis=axis + "ne", + new_other, + level=level, + fill_value=fill_value, + axis=axis, + squeeze_other=isinstance(other, (pandas.Series, Series)), ) def nlargest(self, n=5, keep="first") -> Series: # noqa: PR01, RT01, D200 From 2418b050dbefb65c6de47e7278bd7a660074b96b Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Mon, 16 Sep 2024 15:01:06 -0700 Subject: [PATCH 9/9] Apply suggestions from code review Co-authored-by: Anatoly Myachev --- modin/pandas/series.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modin/pandas/series.py b/modin/pandas/series.py index 9e52f113176..b4b630aa1de 100644 --- a/modin/pandas/series.py +++ b/modin/pandas/series.py @@ -1037,7 +1037,7 @@ def eq( level=level, fill_value=fill_value, axis=axis, - squeeze_other=isinstance(other, (pandas.Series, Series)), + squeeze_other=isinstance(other, Series), ) def equals(self, other) -> bool: # noqa: PR01, RT01, D200 @@ -1153,7 +1153,7 @@ def ge( level=level, fill_value=fill_value, axis=axis, - squeeze_other=isinstance(other, (pandas.Series, Series)), + squeeze_other=isinstance(other, Series), ) def groupby( @@ -1208,7 +1208,7 @@ def gt( level=level, fill_value=fill_value, axis=axis, - squeeze_other=isinstance(other, (pandas.Series, Series)), + squeeze_other=isinstance(other, Series), ) def hist( @@ -1333,7 +1333,7 @@ def le( level=level, fill_value=fill_value, axis=axis, - squeeze_other=isinstance(other, (pandas.Series, Series)), + squeeze_other=isinstance(other, Series), ) def lt( @@ -1349,7 +1349,7 @@ def lt( level=level, fill_value=fill_value, axis=axis, - squeeze_other=isinstance(other, (pandas.Series, Series)), + squeeze_other=isinstance(other, Series), ) def map(self, arg, na_action=None) -> Series: # noqa: PR01, RT01, D200 @@ -1483,7 +1483,7 @@ def ne( level=level, fill_value=fill_value, axis=axis, - squeeze_other=isinstance(other, (pandas.Series, Series)), + squeeze_other=isinstance(other, Series), ) def nlargest(self, n=5, keep="first") -> Series: # noqa: PR01, RT01, D200