-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PERF: implement scalar ops blockwise #29853
Changes from 15 commits
15f0caa
08a43f0
a765069
c81ea13
c2f6129
016ae64
4536097
798ce75
1fc1e3e
657d1bb
66d34c2
0f26775
a0e4adc
23d5c48
2228f5e
e230cea
2f80502
31607c0
0ec7e74
cf94d13
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -340,13 +340,13 @@ def _verify_integrity(self): | |
f"tot_items: {tot_items}" | ||
) | ||
|
||
def apply(self, f: str, filter=None, **kwargs): | ||
def apply(self, f, filter=None, **kwargs): | ||
""" | ||
Iterate over the blocks, collect and create a new BlockManager. | ||
|
||
Parameters | ||
---------- | ||
f : str | ||
f : str or callable | ||
Name of the Block method to apply. | ||
filter : list, if supplied, only call the block if the filter is in | ||
the block | ||
|
@@ -411,7 +411,10 @@ def apply(self, f: str, filter=None, **kwargs): | |
axis = obj._info_axis_number | ||
kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy) | ||
|
||
applied = getattr(b, f)(**kwargs) | ||
if callable(f): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this strictly necessary? meaning happy to require only callables here (would require some changing) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. all of our existing usages pass strings here to get at Block methods. i think @WillAyd had a suggestion about re-working Block.apply to do str vs callable handling there; that should be its own PR There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. k, yeah this whole section could use some TLC |
||
applied = b.apply(f, **kwargs) | ||
else: | ||
applied = getattr(b, f)(**kwargs) | ||
result_blocks = _extend_blocks(applied, result_blocks) | ||
|
||
if len(result_blocks) == 0: | ||
|
@@ -741,7 +744,7 @@ def copy(self, deep=True): | |
|
||
Parameters | ||
---------- | ||
deep : boolean o rstring, default True | ||
deep : boolean or string, default True | ||
If False, return shallow copy (do not copy data) | ||
If 'all', copy data and a deep copy of the index | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
arithmetic_op, | ||
comparison_op, | ||
define_na_arithmetic_op, | ||
get_array_op, | ||
logical_op, | ||
) | ||
from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY # noqa:F401 | ||
|
@@ -372,6 +373,10 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): | |
right = lib.item_from_zerodim(right) | ||
if lib.is_scalar(right) or np.ndim(right) == 0: | ||
|
||
array_op = get_array_op(func, str_rep=str_rep) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a comment here on what is going on |
||
bm = left._data.apply(array_op, right=right) | ||
return type(left)(bm) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this could just be an if (as you are returning), e.g. change the following elif to an if, but NBD |
||
def column_op(a, b): | ||
return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} | ||
|
||
|
@@ -713,7 +718,7 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): | |
if fill_value is not None: | ||
self = self.fillna(fill_value) | ||
|
||
new_data = dispatch_to_series(self, other, op) | ||
new_data = dispatch_to_series(self, other, op, str_rep) | ||
return self._construct_result(new_data) | ||
|
||
f.__name__ = op_name | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
Functions for arithmetic and comparison operations on NumPy arrays and | ||
ExtensionArrays. | ||
""" | ||
from functools import partial | ||
import operator | ||
from typing import Any, Union | ||
|
||
|
@@ -51,10 +52,10 @@ def comp_method_OBJECT_ARRAY(op, x, y): | |
if isinstance(y, (ABCSeries, ABCIndex)): | ||
y = y.values | ||
|
||
result = libops.vec_compare(x, y, op) | ||
result = libops.vec_compare(x.ravel(), y, op) | ||
else: | ||
result = libops.scalar_compare(x, y, op) | ||
return result | ||
result = libops.scalar_compare(x.ravel(), y, op) | ||
return result.reshape(x.shape) | ||
|
||
|
||
def masked_arith_op(x, y, op): | ||
|
@@ -237,9 +238,9 @@ def comparison_op( | |
elif is_scalar(rvalues) and isna(rvalues): | ||
# numpy does not like comparisons vs None | ||
if op is operator.ne: | ||
res_values = np.ones(len(lvalues), dtype=bool) | ||
res_values = np.ones(lvalues.shape, dtype=bool) | ||
else: | ||
res_values = np.zeros(len(lvalues), dtype=bool) | ||
res_values = np.zeros(lvalues.shape, dtype=bool) | ||
|
||
elif is_object_dtype(lvalues.dtype): | ||
res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) | ||
|
@@ -367,3 +368,13 @@ def fill_bool(x, left=None): | |
res_values = filler(res_values) # type: ignore | ||
|
||
return res_values | ||
|
||
|
||
def get_array_op(op, str_rep=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a doc-string / what this is doing |
||
op_name = op.__name__.strip("_") | ||
if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: | ||
return partial(comparison_op, op=op) | ||
elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}: | ||
return partial(logical_op, op=op) | ||
else: | ||
return partial(arithmetic_op, op=op, str_rep=str_rep) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could be an elif here and re-assign to result, just to make the flow more natural. alt could make this into a method on BM. but for followon's