-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ArrayManager] Array version of putmask logic #44396
Changes from all commits
0e612c2
51a932e
a9f520a
4193142
ad113c1
ee29fed
3c9a8a7
2a4e23e
cf34860
7f3a9a7
78017a0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -89,6 +89,7 @@ | |
new_block, | ||
to_native_types, | ||
) | ||
from pandas.core.internals.methods import putmask_flexible | ||
|
||
if TYPE_CHECKING: | ||
from pandas import Float64Index | ||
|
@@ -190,7 +191,7 @@ def __repr__(self) -> str: | |
def apply( | ||
self: T, | ||
f, | ||
align_keys: list[str] | None = None, | ||
align_keys: list[str] | None = None, # not used for ArrayManager | ||
ignore_failures: bool = False, | ||
**kwargs, | ||
) -> T: | ||
|
@@ -201,7 +202,6 @@ def apply( | |
---------- | ||
f : str or callable | ||
Name of the Array method to apply. | ||
align_keys: List[str] or None, default None | ||
ignore_failures: bool, default False | ||
**kwargs | ||
Keywords to pass to `f` | ||
|
@@ -212,32 +212,14 @@ def apply( | |
""" | ||
assert "filter" not in kwargs | ||
|
||
align_keys = align_keys or [] | ||
result_arrays: list[np.ndarray] = [] | ||
result_indices: list[int] = [] | ||
# fillna: Series/DataFrame is responsible for making sure value is aligned | ||
|
||
aligned_args = {k: kwargs[k] for k in align_keys} | ||
|
||
if f == "apply": | ||
f = kwargs.pop("func") | ||
|
||
for i, arr in enumerate(self.arrays): | ||
|
||
if aligned_args: | ||
|
||
for k, obj in aligned_args.items(): | ||
if isinstance(obj, (ABCSeries, ABCDataFrame)): | ||
# The caller is responsible for ensuring that | ||
# obj.axes[-1].equals(self.items) | ||
if obj.ndim == 1: | ||
kwargs[k] = obj.iloc[i] | ||
else: | ||
kwargs[k] = obj.iloc[:, i]._values | ||
else: | ||
# otherwise we have an array-like | ||
kwargs[k] = obj[i] | ||
|
||
try: | ||
if callable(f): | ||
applied = f(arr, **kwargs) | ||
|
@@ -352,12 +334,28 @@ def putmask(self, mask, new, align: bool = True): | |
align_keys = ["mask"] | ||
new = extract_array(new, extract_numpy=True) | ||
|
||
return self.apply_with_block( | ||
"putmask", | ||
align_keys=align_keys, | ||
mask=mask, | ||
new=new, | ||
) | ||
kwargs = {"mask": mask, "new": new} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not feasible to go through 'apply'? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. So I forgot that But then I would rather remove it from |
||
aligned_kwargs = {k: kwargs[k] for k in align_keys} | ||
|
||
for i, arr in enumerate(self.arrays): | ||
|
||
for k, obj in aligned_kwargs.items(): | ||
if isinstance(obj, (ABCSeries, ABCDataFrame)): | ||
# The caller is responsible for ensuring that | ||
# obj.axes[-1].equals(self.items) | ||
if obj.ndim == 1: | ||
kwargs[k] = obj._values | ||
else: | ||
kwargs[k] = obj.iloc[:, i]._values | ||
else: | ||
# otherwise we have an ndarray | ||
if self.ndim == 2: | ||
kwargs[k] = obj[i] | ||
|
||
new = putmask_flexible(arr, **kwargs) | ||
self.arrays[i] = new | ||
|
||
return self | ||
|
||
def diff(self: T, n: int, axis: int) -> T: | ||
if axis == 1: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
""" | ||
Wrappers around array_algos with internals-specific logic | ||
""" | ||
from __future__ import annotations | ||
|
||
import numpy as np | ||
|
||
from pandas.core.dtypes.cast import ( | ||
can_hold_element, | ||
find_common_type, | ||
infer_dtype_from, | ||
) | ||
from pandas.core.dtypes.common import is_interval_dtype | ||
from pandas.core.dtypes.generic import ( | ||
ABCDataFrame, | ||
ABCIndex, | ||
ABCSeries, | ||
) | ||
from pandas.core.dtypes.missing import ( | ||
is_valid_na_for_dtype, | ||
na_value_for_dtype, | ||
) | ||
|
||
from pandas.core.array_algos.putmask import ( | ||
extract_bool_array, | ||
putmask_smart, | ||
putmask_without_repeat, | ||
setitem_datetimelike_compat, | ||
validate_putmask, | ||
) | ||
from pandas.core.arrays import ExtensionArray | ||
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray | ||
|
||
|
||
def putmask_flexible(array: np.ndarray | ExtensionArray, mask, new): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where is all of this logic from? this doesn't look like 'simple' wrappers to me. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's logic that for the BlockManager lives on the blocks (the Block.putmask version also calls those There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. great can u then remove from where it's used now There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The BlockManager is still used .. I can try to see if there are some more parts that could be moved into There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, so I could easily remove most of the logic of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let me put this another way. you are adding a ton of code to methods, is this generally useful? can we reuse elsewhere? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No, it is only for use within |
||
""" | ||
Putmask implementation for ArrayManager.putmask. | ||
|
||
Flexible version that will upcast if needed. | ||
""" | ||
if isinstance(array, np.ndarray): | ||
return putmask_flexible_ndarray(array, mask=mask, new=new) | ||
else: | ||
return putmask_flexible_ea(array, mask=mask, new=new) | ||
|
||
|
||
def putmask_flexible_ndarray(array: np.ndarray, mask, new): | ||
""" | ||
Putmask implementation for ArrayManager putmask for ndarray. | ||
|
||
Flexible version that will upcast if needed. | ||
""" | ||
mask, noop = validate_putmask(array, mask) | ||
assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame)) | ||
|
||
# if we are passed a scalar None, convert it here | ||
if not array.dtype == "object" and is_valid_na_for_dtype(new, array.dtype): | ||
new = na_value_for_dtype(array.dtype, compat=False) | ||
|
||
if can_hold_element(array, new): | ||
putmask_without_repeat(array, mask, new) | ||
return array | ||
|
||
elif noop: | ||
return array | ||
|
||
dtype, _ = infer_dtype_from(new) | ||
if dtype.kind in ["m", "M"]: | ||
array = array.astype(object) | ||
# convert to list to avoid numpy coercing datetimelikes to integers | ||
new = setitem_datetimelike_compat(array, mask.sum(), new) | ||
# putmask_smart below converts it back to array | ||
np.putmask(array, mask, new) | ||
return array | ||
|
||
new_values = putmask_smart(array, mask, new) | ||
return new_values | ||
|
||
|
||
def _coerce_to_target_dtype(array, new): | ||
dtype, _ = infer_dtype_from(new, pandas_dtype=True) | ||
new_dtype = find_common_type([array.dtype, dtype]) | ||
return array.astype(new_dtype, copy=False) | ||
|
||
|
||
def putmask_flexible_ea(array: ExtensionArray, mask, new): | ||
""" | ||
Putmask implementation for ArrayManager putmask for EA. | ||
|
||
Flexible version that will upcast if needed. | ||
""" | ||
mask = extract_bool_array(mask) | ||
|
||
if mask.ndim == array.ndim + 1: | ||
# TODO(EA2D): unnecessary with 2D EAs | ||
mask = mask.reshape(array.shape) | ||
|
||
if isinstance(array, NDArrayBackedExtensionArray): | ||
if not can_hold_element(array, new): | ||
array = _coerce_to_target_dtype(array, new) | ||
return putmask_flexible(array, mask, new) | ||
|
||
try: | ||
array._putmask(mask, new) | ||
except TypeError: | ||
if not is_interval_dtype(array.dtype): | ||
# Discussion about what we want to support in the general | ||
# case GH#39584 | ||
raise | ||
|
||
array = _coerce_to_target_dtype(array, new) | ||
if array.dtype == np.dtype("object"): | ||
# For now at least, only support casting e.g. | ||
# Interval[int64]->Interval[float64], | ||
raise | ||
return putmask_flexible_ea(array, mask, new) | ||
|
||
return array |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not worth having a separate one for AM?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What do you mean exactly? Separate signature for ArrayManager?
I initially remove this from the signature (since it is not used), but we have this method as an abstract method on the base class manager, and thus it gives typing errors if I remove it here.