-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
REF: Simplify quantile, remove reduction from BlockManager #24597
Changes from 6 commits
4056ed2
ee695ec
30543bf
d26f773
379fdde
319a77b
b5bf497
11af1dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,7 @@ | |
maybe_promote) | ||
from pandas.core.dtypes.common import ( | ||
_NS_DTYPE, is_datetimelike_v_numeric, is_extension_array_dtype, | ||
is_extension_type, is_numeric_v_string_like, is_scalar) | ||
is_extension_type, is_list_like, is_numeric_v_string_like, is_scalar) | ||
import pandas.core.dtypes.concat as _concat | ||
from pandas.core.dtypes.generic import ABCExtensionArray, ABCSeries | ||
from pandas.core.dtypes.missing import isna | ||
|
@@ -402,34 +402,47 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, | |
bm._consolidate_inplace() | ||
return bm | ||
|
||
def reduction(self, f, axis=0, consolidate=True, transposed=False, | ||
**kwargs): | ||
def quantile(self, axis=0, consolidate=True, transposed=False, | ||
interpolation='linear', qs=None, numeric_only=None): | ||
""" | ||
iterate over the blocks, collect and create a new block manager. | ||
Iterate over blocks applying quantile reduction. | ||
This routine is intended for reduction type operations and | ||
will do inference on the generated blocks. | ||
|
||
Parameters | ||
---------- | ||
f: the callable or function name to operate on at the block level | ||
axis: reduction axis, default 0 | ||
consolidate: boolean, default True. Join together blocks having same | ||
dtype | ||
transposed: boolean, default False | ||
we are holding transposed data | ||
interpolation : type of interpolation, default 'linear' | ||
qs : a scalar or list of the quantiles to be computed | ||
numeric_only : ignored | ||
|
||
Returns | ||
------- | ||
Block Manager (new object) | ||
|
||
""" | ||
|
||
if consolidate: | ||
self._consolidate_inplace() | ||
|
||
def get_axe(block, qs, axes): | ||
from pandas import Float64Index | ||
if is_list_like(qs): | ||
ax = Float64Index(qs) | ||
elif block.ndim == 1: | ||
ax = Float64Index([qs]) | ||
else: | ||
ax = axes[0] | ||
return ax | ||
|
||
axes, blocks = [], [] | ||
for b in self.blocks: | ||
axe, block = getattr(b, f)(axis=axis, axes=self.axes, **kwargs) | ||
block = b.quantile(axis=axis, qs=qs, interpolation=interpolation) | ||
|
||
axe = get_axe(b, qs, axes=self.axes) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also doesn't need / take the bock arg There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it uses the block arg There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no i mean get_axe doens't There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (which is why it is a function instead of just done once outside the loop). I'd rather keep it as a function than in-line it, but not a deal-breaker. There is another PR after this that will be ripping out a bunch of code regardless. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
line 435 inside There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. grr, ok, i c now |
||
|
||
axes.append(axe) | ||
blocks.append(block) | ||
|
@@ -496,9 +509,6 @@ def isna(self, func, **kwargs): | |
def where(self, **kwargs): | ||
return self.apply('where', **kwargs) | ||
|
||
def quantile(self, **kwargs): | ||
return self.reduction('quantile', **kwargs) | ||
|
||
def setitem(self, **kwargs): | ||
return self.apply('setitem', **kwargs) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1194,3 +1194,75 @@ def f(x, y): | |
nanle = make_nancomp(operator.le) | ||
naneq = make_nancomp(operator.eq) | ||
nanne = make_nancomp(operator.ne) | ||
|
||
|
||
def _nanpercentile1D(values, mask, q, na_value, interpolation): | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Wraper for np.percentile that skips missing values, specialized to | ||
1-dimensional case. | ||
|
||
Parameters | ||
---------- | ||
values : array over which to find quantiles | ||
mask : ndarray[bool] | ||
locations in values that should be considered missing | ||
q : scalar or array of quantile indices to find | ||
na_value : scalar | ||
value to return for empty or all-null values | ||
interpolation : str | ||
|
||
Returns | ||
------- | ||
quantiles : scalar or array | ||
""" | ||
# mask is Union[ExtensionArray, ndarray] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add doc-strings There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just pushed with docstrings. We're going to simplify the tar out of these methods if/when #24600 gets fixed. |
||
values = values[~mask] | ||
|
||
if len(values) == 0: | ||
if lib.is_scalar(q): | ||
return na_value | ||
else: | ||
return np.array([na_value] * len(q), | ||
dtype=values.dtype) | ||
|
||
return np.percentile(values, q, interpolation=interpolation) | ||
|
||
|
||
def nanpercentile(values, q, axis, na_value, mask, ndim, interpolation): | ||
""" | ||
Wraper for np.percentile that skips missing values. | ||
|
||
Parameters | ||
---------- | ||
values : array over which to find quantiles | ||
q : scalar or array of quantile indices to find | ||
axis : {0, 1} | ||
na_value : scalar | ||
value to return for empty or all-null values | ||
mask : ndarray[bool] | ||
locations in values that should be considered missing | ||
ndim : {1, 2} | ||
interpolation : str | ||
|
||
Returns | ||
------- | ||
quantiles : scalar or array | ||
""" | ||
if not lib.is_scalar(mask) and mask.any(): | ||
if ndim == 1: | ||
return _nanpercentile1D(values, mask, q, na_value, | ||
interpolation=interpolation) | ||
else: | ||
# for nonconsolidatable blocks mask is 1D, but values 2D | ||
if mask.ndim < values.ndim: | ||
mask = mask.reshape(values.shape) | ||
if axis == 0: | ||
values = values.T | ||
mask = mask.T | ||
result = [_nanpercentile1D(val, m, q, na_value, | ||
interpolation=interpolation) | ||
for (val, m) in zip(list(values), list(mask))] | ||
result = np.array(result, dtype=values.dtype, copy=False).T | ||
return result | ||
else: | ||
return np.percentile(values, q, axis=axis, interpolation=interpolation) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i don't think this adds anything to make it a function