-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH: 2D support for MaskedArray #38992
Changes from all commits
ae51cff
f608792
125606b
dd5dbbe
577826c
17f63d4
33b2d78
a2bd7b1
3f14fa3
6600588
560279c
553038c
b2a26bf
8a40d59
44999d1
7a6c226
638bd9c
aca12e6
f27f8c0
3810660
f0957b3
f538868
6664d0d
2792724
061a53c
6032ed1
543258d
e96ec33
6ca7f01
6f26c4b
3dedb8f
34fda97
2a108ba
2c99e59
ee9c3a0
efd0071
3839b98
0956993
4b75101
639fc23
2989efc
81cd3e4
8f315bc
21cf578
6f215d6
e68c797
17dd19a
93d65eb
3bfe60c
5c28d69
92d710b
5b014c1
db76ca0
8148fcd
15a533f
7a7601e
7c6baaf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -637,7 +637,7 @@ def pad_inplace(numeric_object_t[:] values, uint8_t[:] mask, limit=None): | |
|
||
@cython.boundscheck(False) | ||
@cython.wraparound(False) | ||
def pad_2d_inplace(numeric_object_t[:, :] values, const uint8_t[:, :] mask, limit=None): | ||
def pad_2d_inplace(numeric_object_t[:, :] values, uint8_t[:, :] mask, limit=None): | ||
cdef: | ||
Py_ssize_t i, j, N, K | ||
numeric_object_t val | ||
|
@@ -656,10 +656,11 @@ def pad_2d_inplace(numeric_object_t[:, :] values, const uint8_t[:, :] mask, limi | |
val = values[j, 0] | ||
for i in range(N): | ||
if mask[j, i]: | ||
if fill_count >= lim: | ||
if fill_count >= lim or i == 0: | ||
continue | ||
fill_count += 1 | ||
values[j, i] = val | ||
mask[j, i] = False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mask should be previous mask... see pad_inplace #39953
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so fixing this on my numba branch results in... @numba.njit
def _pad_2d_inplace(values, mask, limit=None):
if values.shape[1]:
K, N = values.shape
if limit is None:
for j in range(K):
val, prev_mask = values[j, 0], mask[j, 0]
for i in range(N):
if mask[j, i]:
values[j, i], mask[j, i] = val, prev_mask
else:
val, prev_mask = values[j, i], mask[j, i]
else:
for j in range(K):
fill_count = 0
val, prev_mask = values[j, 0], mask[j, 0]
for i in range(N):
if mask[j, i]:
if fill_count >= limit:
continue
fill_count += 1
values[j, i], mask[j, i] = val, prev_mask
else:
fill_count = 0
val, prev_mask = values[j, i], mask[j, i] I have some duplication here but a perf improvement for the common case of no limit, the duplication can probably be mitigated by reshaping a 1d array and removing the 1d version pad_inplace There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I might look into a variant using 2 loops, the first to find the first not missing value. and the second to fill without tracking the previous mask, then we could just do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One more thing while in the neighborhood, I think |
||
else: | ||
fill_count = 0 | ||
val = values[j, i] | ||
|
@@ -759,7 +760,7 @@ def backfill_inplace(numeric_object_t[:] values, uint8_t[:] mask, limit=None): | |
|
||
|
||
def backfill_2d_inplace(numeric_object_t[:, :] values, | ||
const uint8_t[:, :] mask, | ||
uint8_t[:, :] mask, | ||
limit=None): | ||
pad_2d_inplace(values[:, ::-1], mask[:, ::-1], limit) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,10 @@ | |
for missing values. | ||
""" | ||
|
||
from typing import Callable | ||
from typing import ( | ||
Callable, | ||
Optional, | ||
) | ||
|
||
import numpy as np | ||
|
||
|
@@ -19,6 +22,7 @@ def _sumprod( | |
*, | ||
skipna: bool = True, | ||
min_count: int = 0, | ||
axis: Optional[int] = None, | ||
): | ||
""" | ||
Sum or product for 1D masked array. | ||
|
@@ -36,36 +40,55 @@ def _sumprod( | |
min_count : int, default 0 | ||
The required number of valid values to perform the operation. If fewer than | ||
``min_count`` non-NA values are present the result will be NA. | ||
axis : int, optional, default None | ||
""" | ||
if not skipna: | ||
if mask.any() or check_below_min_count(values.shape, None, min_count): | ||
if mask.any(axis=axis) or check_below_min_count(values.shape, None, min_count): | ||
return libmissing.NA | ||
else: | ||
return func(values) | ||
return func(values, axis=axis) | ||
else: | ||
if check_below_min_count(values.shape, mask, min_count): | ||
if check_below_min_count(values.shape, mask, min_count) and ( | ||
axis is None or values.ndim == 1 | ||
): | ||
return libmissing.NA | ||
return func(values, where=~mask) | ||
|
||
return func(values, where=~mask, axis=axis) | ||
|
||
|
||
def sum( | ||
values: np.ndarray, mask: np.ndarray, *, skipna: bool = True, min_count: int = 0 | ||
values: np.ndarray, | ||
mask: np.ndarray, | ||
*, | ||
skipna: bool = True, | ||
min_count: int = 0, | ||
axis: Optional[int] = None, | ||
): | ||
return _sumprod( | ||
np.sum, values=values, mask=mask, skipna=skipna, min_count=min_count | ||
np.sum, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis | ||
) | ||
|
||
|
||
def prod( | ||
values: np.ndarray, mask: np.ndarray, *, skipna: bool = True, min_count: int = 0 | ||
values: np.ndarray, | ||
mask: np.ndarray, | ||
*, | ||
skipna: bool = True, | ||
min_count: int = 0, | ||
axis: Optional[int] = None, | ||
): | ||
return _sumprod( | ||
np.prod, values=values, mask=mask, skipna=skipna, min_count=min_count | ||
np.prod, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis | ||
) | ||
|
||
|
||
def _minmax( | ||
func: Callable, values: np.ndarray, mask: np.ndarray, *, skipna: bool = True | ||
func: Callable, | ||
values: np.ndarray, | ||
mask: np.ndarray, | ||
*, | ||
skipna: bool = True, | ||
axis: Optional[int] = None, | ||
): | ||
""" | ||
Reduction for 1D masked array. | ||
|
@@ -80,6 +103,7 @@ def _minmax( | |
Boolean numpy array (True values indicate missing values). | ||
skipna : bool, default True | ||
Whether to skip NA. | ||
axis : int, optional, default None | ||
""" | ||
if not skipna: | ||
if mask.any() or not values.size: | ||
|
@@ -96,14 +120,27 @@ def _minmax( | |
return libmissing.NA | ||
|
||
|
||
def min(values: np.ndarray, mask: np.ndarray, *, skipna: bool = True): | ||
return _minmax(np.min, values=values, mask=mask, skipna=skipna) | ||
def min( | ||
values: np.ndarray, | ||
mask: np.ndarray, | ||
*, | ||
skipna: bool = True, | ||
axis: Optional[int] = None, | ||
): | ||
return _minmax(np.min, values=values, mask=mask, skipna=skipna, axis=axis) | ||
|
||
|
||
def max(values: np.ndarray, mask: np.ndarray, *, skipna: bool = True): | ||
return _minmax(np.max, values=values, mask=mask, skipna=skipna) | ||
def max( | ||
values: np.ndarray, | ||
mask: np.ndarray, | ||
*, | ||
skipna: bool = True, | ||
axis: Optional[int] = None, | ||
): | ||
return _minmax(np.max, values=values, mask=mask, skipna=skipna, axis=axis) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are there doc strings here? if so can you update (can be followup as well) |
||
|
||
|
||
# TODO: axis kwarg | ||
def mean(values: np.ndarray, mask: np.ndarray, skipna: bool = True): | ||
if not values.size or mask.all(): | ||
return libmissing.NA | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
see #38992 (comment)