API: BooleanArray any/all with NA logic (pandas-dev#30062)

proost · Dec 19, 2019 · 1e7a3fc · 1e7a3fc
1 parent 723c7ee
commit 1e7a3fc
Show file tree

Hide file tree

Showing 3 changed files with 174 additions and 5 deletions.
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -281,6 +281,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
     pytest -q --doctest-modules pandas/core/arrays/string_.py
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Doctests arrays/boolean.py' ; echo $MSG
+    pytest -q --doctest-modules pandas/core/arrays/boolean.py
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
 fi
 
 ### DOCSTRINGS ###

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -6,6 +6,7 @@
 
 from pandas._libs import lib, missing as libmissing
 from pandas.compat import set_function_name
+from pandas.compat.numpy import function as nv
 
 from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.cast import astype_nansafe
@@ -571,6 +572,143 @@ def _values_for_argsort(self) -> np.ndarray:
         data[self._mask] = -1
         return data
 
+    def any(self, skipna: bool = True, **kwargs):
+        """
+        Return whether any element is True.
+
+        Returns False unless there is at least one element that is True.
+        By default, NAs are skipped. If ``skipna=False`` is specified and
+        missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
+        is used as for logical operations.
+
+        Parameters
+        ----------
+        skipna : bool, default True
+            Exclude NA values. If the entire array is NA and `skipna` is
+            True, then the result will be False, as for an empty array.
+            If `skipna` is False, the result will still be True if there is
+            at least one element that is True, otherwise NA will be returned
+            if there are NA's present.
+        **kwargs : any, default None
+            Additional keywords have no effect but might be accepted for
+            compatibility with NumPy.
+
+        Returns
+        -------
+        bool or :attr:`pandas.NA`
+
+        See Also
+        --------
+        numpy.any : Numpy version of this method.
+        BooleanArray.all : Return whether all elements are True.
+
+        Examples
+        --------
+
+        The result indicates whether any element is True (and by default
+        skips NAs):
+
+        >>> pd.array([True, False, True]).any()
+        True
+        >>> pd.array([True, False, pd.NA]).any()
+        True
+        >>> pd.array([False, False, pd.NA]).any()
+        False
+        >>> pd.array([], dtype="boolean").any()
+        False
+        >>> pd.array([pd.NA], dtype="boolean").any()
+        False
+
+        With ``skipna=False``, the result can be NA if this is logically
+        required (whether ``pd.NA`` is True or False influences the result):
+
+        >>> pd.array([True, False, pd.NA]).any(skipna=False)
+        True
+        >>> pd.array([False, False, pd.NA]).any(skipna=False)
+        NA
+        """
+        kwargs.pop("axis", None)
+        nv.validate_any((), kwargs)
+
+        values = self._data.copy()
+        np.putmask(values, self._mask, False)
+        result = values.any()
+        if skipna:
+            return result
+        else:
+            if result or len(self) == 0:
+                return result
+            else:
+                return self.dtype.na_value
+
+    def all(self, skipna: bool = True, **kwargs):
+        """
+        Return whether all elements are True.
+
+        Returns True unless there is at least one element that is False.
+        By default, NAs are skipped. If ``skipna=False`` is specified and
+        missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
+        is used as for logical operations.
+
+        Parameters
+        ----------
+        skipna : bool, default True
+            Exclude NA values. If the entire array is NA and `skipna` is
+            True, then the result will be True, as for an empty array.
+            If `skipna` is False, the result will still be False if there is
+            at least one element that is False, otherwise NA will be returned
+            if there are NA's present.
+        **kwargs : any, default None
+            Additional keywords have no effect but might be accepted for
+            compatibility with NumPy.
+
+        Returns
+        -------
+        bool or :attr:`pandas.NA`
+
+        See Also
+        --------
+        numpy.all : Numpy version of this method.
+        BooleanArray.any : Return whether any element is True.
+
+        Examples
+        --------
+
+        The result indicates whether any element is True (and by default
+        skips NAs):
+
+        >>> pd.array([True, True, pd.NA]).all()
+        True
+        >>> pd.array([True, False, pd.NA]).all()
+        False
+        >>> pd.array([], dtype="boolean").all()
+        True
+        >>> pd.array([pd.NA], dtype="boolean").all()
+        True
+
+        With ``skipna=False``, the result can be NA if this is logically
+        required (whether ``pd.NA`` is True or False influences the result):
+
+        >>> pd.array([True, True, pd.NA]).all(skipna=False)
+        NA
+        >>> pd.array([True, False, pd.NA]).all(skipna=False)
+        False
+        """
+        kwargs.pop("axis", None)
+        nv.validate_all((), kwargs)
+
+        values = self._data.copy()
+        np.putmask(values, self._mask, True)
+        result = values.all()
+
+        if skipna:
+            return result
+        else:
+            if not result or len(self) == 0:
+                return result
+            else:
+                return self.dtype.na_value
+
     @classmethod
     def _create_logical_method(cls, op):
         def logical_method(self, other):
@@ -667,6 +805,10 @@ def cmp_method(self, other):
         return set_function_name(cmp_method, name, cls)
 
     def _reduce(self, name, skipna=True, **kwargs):
+
+        if name in {"any", "all"}:
+            return getattr(self, name)(skipna=skipna, **kwargs)
+
         data = self._data
         mask = self._mask
 
@@ -678,12 +820,8 @@ def _reduce(self, name, skipna=True, **kwargs):
         op = getattr(nanops, "nan" + name)
         result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)
 
-        # if we have a boolean op, don't coerce
-        if name in ["any", "all"]:
-            pass
-
         # if we have numeric op that would result in an int, coerce to int if possible
-        elif name in ["sum", "prod"] and notna(result):
+        if name in ["sum", "prod"] and notna(result):
             int_result = np.int64(result)
             if int_result == result:
                 result = int_result

diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py
@@ -715,6 +715,33 @@ def test_reductions_return_types(dropna, data, all_numeric_reductions):
         assert isinstance(getattr(s, op)(), np.float64)
 
 
+@pytest.mark.parametrize(
+    "values, exp_any, exp_all, exp_any_noskip, exp_all_noskip",
+    [
+        ([True, pd.NA], True, True, True, pd.NA),
+        ([False, pd.NA], False, False, pd.NA, False),
+        ([pd.NA], False, True, pd.NA, pd.NA),
+        ([], False, True, False, True),
+    ],
+)
+def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
+    # the methods return numpy scalars
+    exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any)
+    exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all)
+    exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
+    exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)
+
+    for con in [pd.array, pd.Series]:
+        a = con(values, dtype="boolean")
+        assert a.any() is exp_any
+        assert a.all() is exp_all
+        assert a.any(skipna=False) is exp_any_noskip
+        assert a.all(skipna=False) is exp_all_noskip
+
+        assert np.any(a.any()) is exp_any
+        assert np.all(a.all()) is exp_all
+
+
 # TODO when BooleanArray coerces to object dtype numpy array, need to do conversion
 # manually in the indexing code
 # def test_indexing_boolean_mask():