Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: define arithmetic methods non-dynamically #51813

Merged
merged 12 commits into from
Mar 8, 2023
16 changes: 14 additions & 2 deletions pandas/_libs/properties.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@ from pandas._typing import (
Series,
)

# These cannot _really_ be just FrameOps/SeriesOps, as those are
# mixins to DataFrame/Series. We include those here so that the annotations
# in the mixin are correct.
from pandas.core.ops.methods import (
FrameOps,
SeriesOps,
)

# note: this is a lie to make type checkers happy (they special
# case property). cache_readonly uses attribute names similar to
# property (fget) but it does not provide fset and fdel.
Expand All @@ -19,9 +27,13 @@ class AxisProperty:
axis: int
def __init__(self, axis: int = ..., doc: str = ...) -> None: ...
@overload
def __get__(self, obj: DataFrame | Series, type) -> Index: ...
def __get__(
self, obj: DataFrame | Series | SeriesOps | FrameOps, type
) -> Index: ...
@overload
def __get__(self, obj: None, type) -> AxisProperty: ...
def __set__(
self, obj: DataFrame | Series, value: AnyArrayLike | Sequence
self,
obj: DataFrame | Series | SeriesOps | FrameOps,
value: AnyArrayLike | Sequence,
) -> None: ...
2 changes: 1 addition & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1349,7 +1349,7 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:

def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
# for binary ops, use our custom dunder methods
result = ops.maybe_dispatch_ufunc_to_dunder_op(
result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
self, ufunc, method, *inputs, **kwargs
)
if result is not NotImplemented:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
return NotImplemented

# for binary ops, use our custom dunder methods
result = ops.maybe_dispatch_ufunc_to_dunder_op(
result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
self, ufunc, method, *inputs, **kwargs
)
if result is not NotImplemented:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
# in PandasArray, since pandas' ExtensionArrays are 1-d.
out = kwargs.get("out", ())

result = ops.maybe_dispatch_ufunc_to_dunder_op(
result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
self, ufunc, method, *inputs, **kwargs
)
if result is not NotImplemented:
Expand Down
7 changes: 2 additions & 5 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,7 @@
notna,
)

from pandas.core import (
arraylike,
ops,
)
from pandas.core import arraylike
import pandas.core.algorithms as algos
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays import ExtensionArray
Expand Down Expand Up @@ -1643,7 +1640,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
return NotImplemented

# for binary ops, use our custom dunder methods
result = ops.maybe_dispatch_ufunc_to_dunder_op(
result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
self, ufunc, method, *inputs, **kwargs
)
if result is not NotImplemented:
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,14 @@
)
from pandas.core.dtypes.missing import isna

from pandas.core import nanops
from pandas.core import (
nanops,
roperator,
)
from pandas.core.array_algos import datetimelike_accumulations
from pandas.core.arrays import datetimelike as dtl
from pandas.core.arrays._ranges import generate_regular_range
import pandas.core.common as com
from pandas.core.ops import roperator
from pandas.core.ops.common import unpack_zerodim_and_defer

if TYPE_CHECKING:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/computation/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

from pandas.util._exceptions import find_stack_level

from pandas.core import roperator
from pandas.core.computation.check import NUMEXPR_INSTALLED
from pandas.core.ops import roperator

if NUMEXPR_INSTALLED:
import numexpr as ne
Expand Down
165 changes: 7 additions & 158 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@
algorithms,
common as com,
nanops,
ops,
)
from pandas.core.accessor import CachedAccessor
from pandas.core.apply import (
Expand Down Expand Up @@ -172,6 +171,7 @@
treat_as_nested,
)
from pandas.core.methods import selectn
from pandas.core.ops.methods import FrameOps
from pandas.core.reshape.melt import melt
from pandas.core.series import Series
from pandas.core.shared_docs import _shared_docs
Expand Down Expand Up @@ -488,7 +488,8 @@
# DataFrame class


class DataFrame(NDFrame, OpsMixin):
# Cannot override writable attribute "_get_axis_number" with a final one
class DataFrame(NDFrame, FrameOps, OpsMixin): # type: ignore[misc]
"""
Two-dimensional, size-mutable, potentially heterogeneous tabular data.

Expand Down Expand Up @@ -631,7 +632,8 @@ class DataFrame(NDFrame, OpsMixin):
_mgr: BlockManager | ArrayManager

@property
def _constructor(self) -> Callable[..., DataFrame]:
# error: Cannot override writeable attribute with read-only property
def _constructor(self) -> Callable[..., DataFrame]: # type: ignore[override]
return DataFrame

_constructor_sliced: Callable[..., Series] = Series
Expand Down Expand Up @@ -905,7 +907,8 @@ def axes(self) -> list[Index]:
return [self.index, self.columns]

@property
def shape(self) -> tuple[int, int]:
# error: Cannot override writeable attribute with read-only property
def shape(self) -> tuple[int, int]: # type: ignore[override]
"""
Return a tuple representing the dimensionality of the DataFrame.

Expand Down Expand Up @@ -7447,158 +7450,6 @@ class diet
result.columns = result.columns.reorder_levels(order)
return result

# ----------------------------------------------------------------------
# Arithmetic Methods

def _cmp_method(self, other, op):
axis: Literal[1] = 1 # only relevant for Series other case

self, other = ops.align_method_FRAME(self, other, axis, flex=False, level=None)

# See GH#4537 for discussion of scalar op behavior
new_data = self._dispatch_frame_op(other, op, axis=axis)
return self._construct_result(new_data)

def _arith_method(self, other, op):
if ops.should_reindex_frame_op(self, other, op, 1, None, None):
return ops.frame_arith_method_with_reindex(self, other, op)

axis: Literal[1] = 1 # only relevant for Series other case
other = ops.maybe_prepare_scalar_for_op(other, (self.shape[axis],))

self, other = ops.align_method_FRAME(self, other, axis, flex=True, level=None)

new_data = self._dispatch_frame_op(other, op, axis=axis)
return self._construct_result(new_data)

_logical_method = _arith_method

def _dispatch_frame_op(self, right, func: Callable, axis: AxisInt | None = None):
"""
Evaluate the frame operation func(left, right) by evaluating
column-by-column, dispatching to the Series implementation.

Parameters
----------
right : scalar, Series, or DataFrame
func : arithmetic or comparison operator
axis : {None, 0, 1}

Returns
-------
DataFrame
"""
# Get the appropriate array-op to apply to each column/block's values.
array_op = ops.get_array_op(func)

right = lib.item_from_zerodim(right)
if not is_list_like(right):
# i.e. scalar, faster than checking np.ndim(right) == 0
with np.errstate(all="ignore"):
bm = self._mgr.apply(array_op, right=right)
return self._constructor(bm)

elif isinstance(right, DataFrame):
assert self.index.equals(right.index)
assert self.columns.equals(right.columns)
# TODO: The previous assertion `assert right._indexed_same(self)`
# fails in cases with empty columns reached via
# _frame_arith_method_with_reindex

# TODO operate_blockwise expects a manager of the same type
with np.errstate(all="ignore"):
bm = self._mgr.operate_blockwise(
# error: Argument 1 to "operate_blockwise" of "ArrayManager" has
# incompatible type "Union[ArrayManager, BlockManager]"; expected
# "ArrayManager"
# error: Argument 1 to "operate_blockwise" of "BlockManager" has
# incompatible type "Union[ArrayManager, BlockManager]"; expected
# "BlockManager"
right._mgr, # type: ignore[arg-type]
array_op,
)
return self._constructor(bm)

elif isinstance(right, Series) and axis == 1:
# axis=1 means we want to operate row-by-row
assert right.index.equals(self.columns)

right = right._values
# maybe_align_as_frame ensures we do not have an ndarray here
assert not isinstance(right, np.ndarray)

with np.errstate(all="ignore"):
arrays = [
array_op(_left, _right)
for _left, _right in zip(self._iter_column_arrays(), right)
]

elif isinstance(right, Series):
assert right.index.equals(self.index) # Handle other cases later
right = right._values

with np.errstate(all="ignore"):
arrays = [array_op(left, right) for left in self._iter_column_arrays()]

else:
# Remaining cases have less-obvious dispatch rules
raise NotImplementedError(right)

return type(self)._from_arrays(
arrays, self.columns, self.index, verify_integrity=False
)

def _combine_frame(self, other: DataFrame, func, fill_value=None):
# at this point we have `self._indexed_same(other)`

if fill_value is None:
# since _arith_op may be called in a loop, avoid function call
# overhead if possible by doing this check once
_arith_op = func

else:

def _arith_op(left, right):
# for the mixed_type case where we iterate over columns,
# _arith_op(left, right) is equivalent to
# left._binop(right, func, fill_value=fill_value)
left, right = ops.fill_binop(left, right, fill_value)
return func(left, right)

new_data = self._dispatch_frame_op(other, _arith_op)
return new_data

def _construct_result(self, result) -> DataFrame:
"""
Wrap the result of an arithmetic, comparison, or logical operation.

Parameters
----------
result : DataFrame

Returns
-------
DataFrame
"""
out = self._constructor(result, copy=False).__finalize__(self)
# Pin columns instead of passing to constructor for compat with
# non-unique columns case
out.columns = self.columns
out.index = self.index
return out

def __divmod__(self, other) -> tuple[DataFrame, DataFrame]:
# Naive implementation, room for optimization
div = self // other
mod = self - div * other
return div, mod

def __rdivmod__(self, other) -> tuple[DataFrame, DataFrame]:
# Naive implementation, room for optimization
div = other // self
mod = other - div * self
return div, mod

# ----------------------------------------------------------------------
# Combination-Related

Expand Down Expand Up @@ -11613,8 +11464,6 @@ def mask(

DataFrame._add_numeric_operations()

ops.add_flex_arithmetic_methods(DataFrame)


def _from_nested_dict(data) -> collections.defaultdict:
new_data: collections.defaultdict = collections.defaultdict(dict)
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@
clean_reindex_fill_method,
find_valid_index,
)
from pandas.core.ops import align_method_FRAME
from pandas.core.reshape.concat import concat
from pandas.core.shared_docs import _shared_docs
from pandas.core.sorting import get_indexer_indexer
Expand Down Expand Up @@ -8069,7 +8068,7 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace):
if isinstance(self, ABCSeries):
threshold = self._constructor(threshold, index=self.index)
else:
threshold = align_method_FRAME(self, threshold, axis, flex=None)[1]
threshold = self._align_for_op(threshold, axis, flex=None)[1]

# GH 40420
# Treat missing thresholds as no bounds, not clipping the values
Expand Down
Loading