Skip to content

Commit

Permalink
API: BooleanArray any/all with NA logic (pandas-dev#30062)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche authored and proost committed Dec 19, 2019
1 parent 723c7ee commit 1e7a3fc
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 5 deletions.
4 changes: 4 additions & 0 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
pytest -q --doctest-modules pandas/core/arrays/string_.py
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Doctests arrays/boolean.py' ; echo $MSG
pytest -q --doctest-modules pandas/core/arrays/boolean.py
RET=$(($RET + $?)) ; echo $MSG "DONE"

fi

### DOCSTRINGS ###
Expand Down
148 changes: 143 additions & 5 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from pandas._libs import lib, missing as libmissing
from pandas.compat import set_function_name
from pandas.compat.numpy import function as nv

from pandas.core.dtypes.base import ExtensionDtype
from pandas.core.dtypes.cast import astype_nansafe
Expand Down Expand Up @@ -571,6 +572,143 @@ def _values_for_argsort(self) -> np.ndarray:
data[self._mask] = -1
return data

def any(self, skipna: bool = True, **kwargs):
"""
Return whether any element is True.
Returns False unless there is at least one element that is True.
By default, NAs are skipped. If ``skipna=False`` is specified and
missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
is used as for logical operations.
Parameters
----------
skipna : bool, default True
Exclude NA values. If the entire array is NA and `skipna` is
True, then the result will be False, as for an empty array.
If `skipna` is False, the result will still be True if there is
at least one element that is True, otherwise NA will be returned
if there are NA's present.
**kwargs : any, default None
Additional keywords have no effect but might be accepted for
compatibility with NumPy.
Returns
-------
bool or :attr:`pandas.NA`
See Also
--------
numpy.any : Numpy version of this method.
BooleanArray.all : Return whether all elements are True.
Examples
--------
The result indicates whether any element is True (and by default
skips NAs):
>>> pd.array([True, False, True]).any()
True
>>> pd.array([True, False, pd.NA]).any()
True
>>> pd.array([False, False, pd.NA]).any()
False
>>> pd.array([], dtype="boolean").any()
False
>>> pd.array([pd.NA], dtype="boolean").any()
False
With ``skipna=False``, the result can be NA if this is logically
required (whether ``pd.NA`` is True or False influences the result):
>>> pd.array([True, False, pd.NA]).any(skipna=False)
True
>>> pd.array([False, False, pd.NA]).any(skipna=False)
NA
"""
kwargs.pop("axis", None)
nv.validate_any((), kwargs)

values = self._data.copy()
np.putmask(values, self._mask, False)
result = values.any()
if skipna:
return result
else:
if result or len(self) == 0:
return result
else:
return self.dtype.na_value

def all(self, skipna: bool = True, **kwargs):
"""
Return whether all elements are True.
Returns True unless there is at least one element that is False.
By default, NAs are skipped. If ``skipna=False`` is specified and
missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
is used as for logical operations.
Parameters
----------
skipna : bool, default True
Exclude NA values. If the entire array is NA and `skipna` is
True, then the result will be True, as for an empty array.
If `skipna` is False, the result will still be False if there is
at least one element that is False, otherwise NA will be returned
if there are NA's present.
**kwargs : any, default None
Additional keywords have no effect but might be accepted for
compatibility with NumPy.
Returns
-------
bool or :attr:`pandas.NA`
See Also
--------
numpy.all : Numpy version of this method.
BooleanArray.any : Return whether any element is True.
Examples
--------
The result indicates whether any element is True (and by default
skips NAs):
>>> pd.array([True, True, pd.NA]).all()
True
>>> pd.array([True, False, pd.NA]).all()
False
>>> pd.array([], dtype="boolean").all()
True
>>> pd.array([pd.NA], dtype="boolean").all()
True
With ``skipna=False``, the result can be NA if this is logically
required (whether ``pd.NA`` is True or False influences the result):
>>> pd.array([True, True, pd.NA]).all(skipna=False)
NA
>>> pd.array([True, False, pd.NA]).all(skipna=False)
False
"""
kwargs.pop("axis", None)
nv.validate_all((), kwargs)

values = self._data.copy()
np.putmask(values, self._mask, True)
result = values.all()

if skipna:
return result
else:
if not result or len(self) == 0:
return result
else:
return self.dtype.na_value

@classmethod
def _create_logical_method(cls, op):
def logical_method(self, other):
Expand Down Expand Up @@ -667,6 +805,10 @@ def cmp_method(self, other):
return set_function_name(cmp_method, name, cls)

def _reduce(self, name, skipna=True, **kwargs):

if name in {"any", "all"}:
return getattr(self, name)(skipna=skipna, **kwargs)

data = self._data
mask = self._mask

Expand All @@ -678,12 +820,8 @@ def _reduce(self, name, skipna=True, **kwargs):
op = getattr(nanops, "nan" + name)
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)

# if we have a boolean op, don't coerce
if name in ["any", "all"]:
pass

# if we have numeric op that would result in an int, coerce to int if possible
elif name in ["sum", "prod"] and notna(result):
if name in ["sum", "prod"] and notna(result):
int_result = np.int64(result)
if int_result == result:
result = int_result
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/arrays/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,33 @@ def test_reductions_return_types(dropna, data, all_numeric_reductions):
assert isinstance(getattr(s, op)(), np.float64)


@pytest.mark.parametrize(
"values, exp_any, exp_all, exp_any_noskip, exp_all_noskip",
[
([True, pd.NA], True, True, True, pd.NA),
([False, pd.NA], False, False, pd.NA, False),
([pd.NA], False, True, pd.NA, pd.NA),
([], False, True, False, True),
],
)
def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
# the methods return numpy scalars
exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any)
exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all)
exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)

for con in [pd.array, pd.Series]:
a = con(values, dtype="boolean")
assert a.any() is exp_any
assert a.all() is exp_all
assert a.any(skipna=False) is exp_any_noskip
assert a.all(skipna=False) is exp_all_noskip

assert np.any(a.any()) is exp_any
assert np.all(a.all()) is exp_all


# TODO when BooleanArray coerces to object dtype numpy array, need to do conversion
# manually in the indexing code
# def test_indexing_boolean_mask():
Expand Down

0 comments on commit 1e7a3fc

Please sign in to comment.